In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle
import astropy.coordinates as coord
import astropy.units as u
from astropy.io import ascii


In [2]:
bounds = pickle.load(open('./pickle/bounds.pk','rb'))

In [3]:
master_cat = pd.read_csv('./catdata/master_catalog_jan_2023.csv')

In [4]:
cat_files = ['cat1_50.pk','cat51_100.pk','cat101_150.pk','cat151_200.pk','cat201_235.pk',
             'cat236_257.pk','cat258_279.pk','cat280_320.pk','cat321_360.pk','cat361_406.pk']

In [21]:
object_matches = pickle.load(open('./pickle/matches_delta006_crowding300.pk','rb'))

In [10]:
train = pickle.load(open('./pickle/training_data_d006_c300.pk','rb'))

In [6]:
def load_cat(field): # change to match-case
    if field <= 50: to_load = cat_files[0]
    elif field <= 100: to_load = cat_files[1]
    elif field <= 150: to_load = cat_files[2]
    elif field <= 200: to_load = cat_files[3]
    elif field <= 235: to_load = cat_files[4]
    elif field <= 257: to_load = cat_files[5]
    elif field <= 279: to_load = cat_files[6]
    elif field <= 320: to_load = cat_files[7]
    elif field <= 360: to_load = cat_files[8]
    elif field <= 406: to_load = cat_files[9]
    print(f'Loading {to_load} ...')
    catalogue = pickle.load(open(f'./pickle/{to_load}','rb'))
    
    return catalogue

## Generate Training Data

In [56]:
def generate_training_data(matches):
    cat = load_cat(1)
    
    columns = ['obj_id','class','i','g','di','dg','ra','dec','field','pdidx']
    values = []
    object_ids = []
    
    for field in matches: # iterate through each field ID
        print(f'Working on {field}')
        working_field = matches[field] # take the list of matches
        if field not in cat: # load the correct catalogue
            cat = load_cat(field)
        for m in working_field: # iterate through each match and grab values from catalogues
            obj_id = master_cat.loc[m[1]].ID
            class_ = master_cat.loc[m[1]].CLASS #STR?
            
            if obj_id in object_ids: continue # if we've already added the object then skip
            else: object_ids.append(obj_id)   # (due to duplicated)
            
            if class_ == 1: class_str = 'gc'
            elif class_ == 4: class_str = 'galaxy'
            elif class_ == 6: class_str = 'star'
            else: continue # skip non-gc/gal/stars
            
            row = cat[field][m[0]]
            ra = row['RA']
            dec = row['Dec']
            g = row['g']
            i = row['i']
            dg = row['dg']
            di = row['di']
            
            values.append([obj_id,class_str,i,g,di,dg,ra,dec,field,m[0]])
    
    training_data_dict = dict(zip(columns,zip(*values)))
    training_data_df = pd.DataFrame(training_data_dict)
    return training_data_df

In [57]:
training_data = generate_training_data(object_matches)

Loading cat1_50.pk ...
Working on 5
Working on 11
Working on 13
Working on 14
Working on 20
Working on 22
Working on 24
Working on 25
Working on 26
Working on 30
Working on 33
Working on 35
Working on 37
Working on 43
Working on 52
Loading cat51_100.pk ...
Working on 58
Working on 62
Working on 70
Working on 80
Working on 81
Working on 82
Working on 84
Working on 85
Working on 91
Working on 92
Working on 94
Working on 95
Working on 97
Working on 102
Loading cat101_150.pk ...
Working on 111
Working on 114
Working on 124
Working on 125
Working on 126
Working on 128
Working on 138
Working on 139
Working on 142
Working on 146
Working on 148
Working on 153
Loading cat151_200.pk ...
Working on 162
Working on 165
Working on 167
Working on 169
Working on 173
Working on 180
Working on 183
Working on 184
Working on 185
Working on 186
Working on 187
Working on 188
Working on 189
Working on 196
Working on 201
Loading cat201_235.pk ...
Working on 204
Working on 205
Working on 207
Working on 208
Wor

In [58]:
with open('./pickle/training_data_d006_c300.pk','wb') as f:
    pickle.dump(training_data,f)

### Analyse Matches

In [28]:
#subset_matches = {k: object_matches[k] for k in (5,11,13,14)}

In [48]:
set(training_data['class'])

{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}

In [104]:
a = [ [1,10], [2,20], [3,30] ]
c = ['dig','ten']

In [117]:
d = dict(zip(c,a))

In [129]:
dict(zip(c,zip(*a)))

{'dig': (1, 2, 3), 'ten': (10, 20, 30)}