In [1]:
import resspect
import pandas as pd
from resspect import request_TOM_data
from resspect import fit_TOM, fit
from resspect import submit_queries_to_TOM
from resspect import time_domain_loop
from resspect.tom_client import TomClient
from resspect import time_domain_loop
from resspect import TimeDomainConfiguration
import os
import re
import numpy as np

In [2]:
###🔲 Need to import this from RESSPECT
###🔲 Need to put this updated version in RESSPECT 
def update_pool_stash(day: int):
    #🔲check if a directory exists to contain features and if it doesn't, make it!
    outdir = 'TOM_days_storage'

    #should we store old features somewhere? makes it easier to add training objs
    #would want to add current MJD, maybe first MJD, and peak MJD
    if day!=0:
        current_stash_path = outdir+'/TOM_compiled_features_day'+str(day-1)+'.csv'
    elif day==0:
        current_stash_path = outdir+'/TOM_hot_features_day_'+str(day)+'.csv'
    
    new_night_path = outdir+'/TOM_hot_features_day_'+str(day)+'.csv'
    
    #read in current stash as list of strings
    with open(current_stash_path, 'r') as f:
        current_stash = f.readlines()
    #read in new night as list of strings
    with open(new_night_path, 'r') as f:
        new_night = f.readlines()

    curent_stash_df = pd.read_csv(current_stash_path)
    new_night_df = pd.read_csv(new_night_path)

    compiled_df = pd.concat([curent_stash_df,new_night_df]).drop_duplicates('id', keep='last')
    compiled_list = compiled_df.to_string(index=False).split('\n')
    compiled_comsep_list = [','.join(ele.split()) for ele in compiled_list]
    return_string = '\n'.join(compiled_comsep_list)

    output_path = outdir+'/TOM_compiled_features_day'+str(day)+'.csv'
    # rewrite the file 
    with open(output_path, 'w') as f:
        f.write(return_string)

In [3]:
#🔲 Need to remove no longer hot SN, will help speed things up
#🔲 in the mean time remove after 15 days
def remove_from_pool_stash(day):
    #🔲check if a directory exists to contain features and if it doesn't, make it!
    outdir = 'TOM_days_storage'
    current_stash_path = outdir+'/TOM_hot_features_day_'+str(day)+'.csv'
    
    #read in current stash as list of strings
    with open(current_stash_path, 'r') as f:
        current_stash = f.readlines()
    
    curent_stash_df = pd.read_csv(current_stash_path)


    #🔲 Need to remove old obj
    remove_old_obj_df = current_stash_df[current_stash_df["date_added"] > day-15]

    removed_list = remove_old_obj_df.to_string(index=False).split('\n')
    removed_comsep_list = [','.join(ele.split()) for ele in removed_list]
    return_string = '\n'.join(removed_comsep_list)

    output_path = outdir+'/TOM_compiled_features_day'+str(day)+'.csv'
    #gotta rewrite the file dummy
    with open(output_path, 'w') as f:
        f.write(return_string)

In [4]:
additional_info = [
        'hostgal_snsep',
        'hostgal_ellipticity',
        'hostgal_sqradius',
        'hostgal_mag_u',
        'hostgal_mag_g',
        'hostgal_mag_r',
        'hostgal_mag_i',
        'hostgal_mag_z',
        'hostgal_mag_y',
        'hostgal_magerr_u',
        'hostgal_magerr_g',
        'hostgal_magerr_r',
        'hostgal_magerr_i',
        'hostgal_magerr_z',
        'hostgal_magerr_y',
    ]

from laiss_resspect_classifier.elasticc2_laiss_feature_extractor import Elasticc2LaissFeatureExtractor

def validate_objects(objects_to_test):
    fe = Elasticc2LaissFeatureExtractor()
    good_objs = []

    for t_obj in objects_to_test:
        fe.photometry= pd.DataFrame(t_obj['photometry'])
        fe.id = t_obj['objectid']

        fe.additional_info = {}
        for info in additional_info:
            fe.additional_info[info] = t_obj[info]

        res = fe.fit_all()
        if res:
            good_objs.append(t_obj)

    return good_objs

In [5]:
def get_phot(obj_df):

    tom = TomClient(url = "https://desc-tom-2.lbl.gov", username = 'awoldag', passwordfile = '../../password.txt')

    # get all of the photometry at once
    ids = obj_df['diaobject_id'].tolist()
    res = tom.post('db/runsqlquery/',
                          json={ 'query': 'SELECT diaobject_id, filtername, midpointtai, psflux, psfluxerr'  
                                ' FROM elasticc2_ppdbdiaforcedsource' 
                              ' WHERE diaobject_id IN (%s) ORDER BY diaobject_id, filtername, midpointtai;' % (', '.join(str(id) for id in ids)),
                                'subdict': {} } )
    all_phot = res.json()['rows']
    all_phot_df = pd.DataFrame(all_phot)
    # if you need mag from the arbitrary flux-
    all_phot_df['mag'] = -2.5*np.log10(all_phot_df['psflux']) + 27.5
    all_phot_df['magerr'] = 2.5/np.log(10) * all_phot_df['psfluxerr']/all_phot_df['psflux']

    #! Need to send Rob a message to ask that these features be included when querying for hot super nova
    host_res = tom.post('db/runsqlquery/',
                          json={ 'query': 'SELECT diaobject_id, hostgal_mag_u, hostgal_mag_g, hostgal_mag_r, hostgal_mag_i, hostgal_mag_z, hostgal_mag_Y, hostgal_magerr_u, hostgal_magerr_g, hostgal_magerr_r, hostgal_magerr_i, hostgal_magerr_z, hostgal_magerr_Y, hostgal_snsep, hostgal_ellipticity, hostgal_sqradius'
                                ' FROM elasticc2_ppdbdiaobject'
                              ' WHERE diaobject_id IN (%s) ORDER BY diaobject_id;' % (', '.join(str(id) for id in ids)),
                                'subdict': {} } )
    all_host = host_res.json()['rows']


    # format into a list of dicts
    data = []
    for idx, obj in obj_df.iterrows():
        phot = all_phot_df[all_phot_df['diaobject_id'] == obj['diaobject_id']]

        phot_d = {}
        phot_d['objectid'] = int(obj['diaobject_id'])
        phot_d['sncode'] = int(obj['gentype'])
        phot_d['redshift'] = obj['zcmb']
        phot_d['ra'] = obj['ra']
        phot_d['dec'] = obj['dec']
        phot_d['photometry'] = phot[['filtername', 'midpointtai', 'psflux', 'psfluxerr', 'mag', 'magerr']].to_dict(orient='list')

        phot_d['photometry']['band'] = phot_d['photometry']['filtername']
        phot_d['photometry']['mjd'] = phot_d['photometry']['midpointtai']
        phot_d['photometry']['flux'] = phot_d['photometry']['psflux']
        phot_d['photometry']['fluxerr'] = phot_d['photometry']['psfluxerr']
        phot_d['photometry']['mag'] = phot_d['photometry']['mag']
        phot_d['photometry']['magerr'] = phot_d['photometry']['magerr']
        del phot_d['photometry']['filtername']
        del phot_d['photometry']['midpointtai']
        del phot_d['photometry']['psflux']
        del phot_d['photometry']['psfluxerr']
        phot_d = {**phot_d, **all_host[idx]}
        del phot_d['diaobject_id']
        data.append(phot_d)

    return data

In [6]:
def get_phot_orig(obj_df):
    # get all of the photometry at once
    ids = obj_df['diaobject_id'].tolist()
    res = tom.post('db/runsqlquery/',
                          json={ 'query': 'SELECT diaobject_id, filtername, midpointtai, psflux, psfluxerr'  
                                ' FROM elasticc2_ppdbdiaforcedsource' 
                              ' WHERE diaobject_id IN (%s) ORDER BY diaobject_id, filtername, midpointtai;' % (', '.join(str(id) for id in ids)),
                                'subdict': {} } )
    all_phot = res.json()['rows']
    all_phot_df = pd.DataFrame(all_phot)
    # if you need mag from the arbitrary flux
    # all_phot_df['mag'] = -2.5*np.log10(all_phot_df['psflux']) + 27.5
    # all_phot_df['magerr'] = 2.5/np.log(10) * all_phot_df['psfluxerr']/all_phot_df['psflux']

    # format into a list of dicts
    data = []
    for idx, obj in obj_df.iterrows():
        phot = all_phot_df[all_phot_df['diaobject_id'] == obj['diaobject_id']]
        
        phot_d = {}
        phot_d['objectid'] = int(obj['diaobject_id'])
        phot_d['sncode'] = int(obj['gentype'])
        phot_d['redshift'] = obj['zcmb']
        phot_d['ra'] = obj['ra']
        phot_d['dec'] = obj['dec']
        phot_d['photometry'] = phot[['filtername', 'midpointtai', 'psflux', 'psfluxerr']].to_dict(orient='list')

        phot_d['photometry']['band'] = phot_d['photometry']['filtername']
        phot_d['photometry']['mjd'] = phot_d['photometry']['midpointtai']
        phot_d['photometry']['flux'] = phot_d['photometry']['psflux']
        phot_d['photometry']['fluxerr'] = phot_d['photometry']['psfluxerr']
        del phot_d['photometry']['filtername']
        del phot_d['photometry']['midpointtai']
        del phot_d['photometry']['psflux']
        del phot_d['photometry']['psfluxerr']
        
        data.append(phot_d)

    return data

In [7]:
#MAKE INITIAL TRAINING SET 
objs = []

tom = TomClient(url = "https://desc-tom-2.lbl.gov", username = 'awoldag', passwordfile = '../../password.txt')

res = tom.post('db/runsqlquery/',
                        json={ 'query': 'SELECT diaobject_id, gentype, zcmb, peakmjd,' 
                              ' peakmag_g, ra, dec FROM elasticc2_diaobjecttruth WHERE peakmjd>61300 and peakmjd<61309 and gentype=10 limit 10;', 
                             'subdict': {}} )
objs.extend(res.json()['rows'])
gentypes = [20,21,25,26,27,12,40,42,59]
for gentype in gentypes:
    res = tom.post('db/runsqlquery/',
                            json={ 'query': 'SELECT diaobject_id, gentype, zcmb, peakmjd,' 
                                f' peakmag_g, ra, dec FROM elasticc2_diaobjecttruth WHERE peakmjd>61300 and peakmjd<61309 and gentype={gentype} limit 5;', 
                                'subdict': {}} )
    objs.extend(res.json()['rows'])

training_objs = get_phot(pd.DataFrame(objs))
good_objs = validate_objects(training_objs)

outdir = 'TOM_days_storage'

if not os.path.exists(outdir):
    os.makedirs(outdir)

feature_extraction_method = 'laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor'
fit(
    good_objs,
    output_features_file = outdir+'/TOM_training_features',
    feature_extractor = feature_extraction_method,
    filters='ZTF',
    additional_info=additional_info,
    one_code=gentypes,
)
data = pd.read_csv('TOM_days_storage/TOM_training_features',index_col=False)
data['orig_sample'] = 'train'
data["type"] = np.where(data["sncode"] == 10, 'Ia', 'other')
data.to_csv('TOM_days_storage/TOM_training_features',index=False)

Not enough obs for 123240715. pass!



INFO:root:Starting laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor fit...
INFO:root:Features have been saved to: TOM_days_storage/TOM_training_features


In [8]:
#MAKE TEST SET 
objs = []

tom = TomClient(url = "https://desc-tom-2.lbl.gov", username = 'awoldag', passwordfile = '../../password.txt')

res = tom.post('db/runsqlquery/',
                        json={ 'query': 'SELECT diaobject_id, gentype, zcmb, peakmjd,' 
                              ' peakmag_g, ra, dec FROM elasticc2_diaobjecttruth WHERE peakmjd>61310 and peakmjd<61339 and gentype=10 limit 1000;', 
                             'subdict': {}} )
objs.extend(res.json()['rows'])

gentypes = [20,21,25,26,27,12,40,42,59]
for gentype in gentypes:
      res = tom.post('db/runsqlquery/',
                              json={ 'query': 'SELECT diaobject_id, gentype, zcmb, peakmjd,' 
                                    f' peakmag_g, ra, dec FROM elasticc2_diaobjecttruth WHERE peakmjd>61310 and peakmjd<61339 and gentype = {gentype} limit 100;', 
                              'subdict': {}} )
      objs.extend(res.json()['rows'])

test_objs = get_phot(pd.DataFrame(objs))
good_objs = validate_objects(test_objs)

outdir = 'TOM_days_storage'

if not os.path.exists(outdir):
    os.makedirs(outdir)

feature_extraction_method = 'laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor'
fit(
    good_objs,
    output_features_file = outdir+'/TOM_testing_features',
    feature_extractor = feature_extraction_method,
    filters='ZTF',
    additional_info=additional_info,
    one_code=gentypes,
)
data = pd.read_csv('TOM_days_storage/TOM_testing_features',index_col=False)
data['orig_sample'] = 'train'
data["type"] = np.where(data["sncode"] == 10, 'Ia', 'other')
data.to_csv('TOM_days_storage/TOM_testing_features',index=False)


Not enough obs for 100502727. pass!

Not enough obs for 33733016. pass!

Not enough obs for 64472383. pass!

Not enough obs for 87617224. pass!

Not enough obs for 152063486. pass!

Not enough obs for 4372763. pass!

Not enough obs for 19887951. pass!

Not enough obs for 109897362. pass!

Not enough obs for 18002946. pass!

Not enough obs for 34126804. pass!

Not enough obs for 102390903. pass!

Not enough obs for 31324487. pass!

Not enough obs for 107621693. pass!

Not enough obs for 75052665. pass!

Not enough obs for 129794122. pass!

Not enough obs for 51185706. pass!

Not enough obs for 159131060. pass!

Not enough obs for 153015355. pass!

Not enough obs for 154313091. pass!

Not enough obs for 12966687. pass!

Not enough obs for 1041473. pass!

Not enough obs for 158241304. pass!

Not enough obs for 112705885. pass!

Not enough obs for 44701642. pass!

Not enough obs for 59852840. pass!

Not enough obs for 65980131. pass!

Not enough obs for 151770909. pass!

Not enough obs for

INFO:root:Starting laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor fit...
INFO:root:Features have been saved to: TOM_days_storage/TOM_testing_features


In [9]:
#✅ MAKE VALIDATION SET 
objs = []

tom = TomClient(url = "https://desc-tom-2.lbl.gov", username = 'awoldag', passwordfile = '../../password.txt')

res = tom.post('db/runsqlquery/',
                        json={ 'query': 'SELECT diaobject_id, gentype, zcmb, peakmjd,' 
                              ' peakmag_g, ra, dec FROM elasticc2_diaobjecttruth WHERE peakmjd>61340 and gentype=10 limit 1000;', 
                             'subdict': {}} )
objs.extend(res.json()['rows'])

gentypes = [20,21,25,26,27,12,40,42,59]
for gentype in gentypes:
    res = tom.post('db/runsqlquery/',
                            json={ 'query': 'SELECT diaobject_id, gentype, zcmb, peakmjd,' 
                                f' peakmag_g, ra, dec FROM elasticc2_diaobjecttruth WHERE peakmjd>61340 and gentype={gentype} limit 100;', 
                                'subdict': {}} )
    objs.extend(res.json()['rows'])

val_objs = get_phot(pd.DataFrame(objs))
good_objs = validate_objects(val_objs)

outdir = 'TOM_days_storage'

if not os.path.exists(outdir):
    os.makedirs(outdir)

feature_extraction_method = 'laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor'
fit(
    good_objs,
    output_features_file = outdir+'/TOM_validation_features',
    feature_extractor = feature_extraction_method,
    filters='ZTF',
    additional_info=additional_info,
    one_code=gentypes,
)
data = pd.read_csv('TOM_days_storage/TOM_validation_features',index_col=False)
data['orig_sample'] = 'train'
data["type"] = np.where(data["sncode"] == 10, 'Ia', 'other')
data.to_csv('TOM_days_storage/TOM_validation_features',index=False)

Not enough obs for 44800973. pass!

Not enough obs for 141328782. pass!

Not enough obs for 61302065. pass!

Not enough obs for 87291295. pass!

Not enough obs for 19117780. pass!

Not enough obs for 80604681. pass!

Not enough obs for 128592939. pass!

Not enough obs for 39828597. pass!

Not enough obs for 79426686. pass!

Not enough obs for 60472694. pass!

Not enough obs for 73972283. pass!

Not enough obs for 154941717. pass!

Not enough obs for 104754364. pass!

Not enough obs for 61547230. pass!

Not enough obs for 148885626. pass!

Not enough obs for 40839889. pass!

Not enough obs for 5650186. pass!

Not enough obs for 46540052. pass!

Not enough obs for 54402381. pass!

Not enough obs for 94323582. pass!

Not enough obs for 152848675. pass!

Not enough obs for 43378718. pass!

Not enough obs for 146319328. pass!

Not enough obs for 105173195. pass!

Not enough obs for 142460430. pass!

Not enough obs for 129186952. pass!

Not enough obs for 14487438. pass!

Not enough obs for 

INFO:root:Starting laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor fit...
INFO:root:Features have been saved to: TOM_days_storage/TOM_validation_features


In [12]:
def run_one_night(day):    
                            #🔲 check for new spec+classification for training set before running the loop
    #✅flag for simulated vs real data
                            #🔲implement an auto-check
    #✅first thing, check if it's real or sim data
                            #🔲MAKE CLEAR WHAT IS FOR REAL DATA VS SIM (eg. real data will want date)
    #✅request light curve data from the TOM - for real and simulated
    
    #get new lc info from TOM (from yesterday (for now))
    # data_dic = request_TOM_data(url = "https://desc-tom-2.lbl.gov", username='awoldag',
    #                             passwordfile='../../password.txt', detected_in_last_days = 1,
    #                             mjdnow = 60796+day-1, cheat_gentypes = [82, 10, 21, 27, 26, 37, 32, 36, 31, 89])
    data_dic = request_TOM_data(url = "https://desc-tom-2.lbl.gov", username='awoldag',
                            passwordfile='../../password.txt', detected_in_last_days = 1,
                            mjdnow = 60796+day-1, cheat_gentypes = [20,21,25,26,27,12,40,42,59,10,30,31,32,35,36,37,11])
    
    data_dic=data_dic['diaobject']

    good_dic = validate_objects(data_dic)

    # feature_extraction_method = 'Malanchev'
    feature_extraction_method = 'laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor'
    # classifier = 'RandomForest'
    classifier = 'laiss_resspect_classifier.laiss_classifier.LaissRandomForest'

    #✅run that data through RESSPECT to get features
                            #🔲at some point, cut out objects that are not likely SN - do this before it gets to RESSPECT probably
    #✅clarify out file argument/data base


    #    if day >= 0:
#        file_name = outdir+'/TOM_hot_features_day_'+str(day)+'.csv'
#    else:
#        file_name = outdir+'/TOM_hot_features.csv'
    
    #get features from that data
    outdir = 'TOM_days_storage'
    file_name = outdir+'/TOM_hot_features_day_'+str(day-1)+'.csv'
        
    fit(
        good_dic,
        output_features_file = file_name,
        feature_extractor = feature_extraction_method,
        filters='ZTF',
        additional_info=additional_info
    )
    data = pd.read_csv(file_name, index_col=False)
    data['orig_sample'] = 'pool'
    #add date added so that we can remove when they are too old

    
    data.to_csv(file_name,index=False)

    # -------------------------

    #get new lc info from TOM (for today)
    # data_dic = request_TOM_data(url = "https://desc-tom-2.lbl.gov",username='awoldag',
    #                             passwordfile='../../../password.txt',detected_in_last_days = 1, mjdnow = 60796+day, 
    #                             cheat_gentypes = [82, 10, 21, 27, 26, 37, 32, 36, 31, 89])
    data_dic = request_TOM_data(url = "https://desc-tom-2.lbl.gov",username='awoldag',
                            passwordfile='../../password.txt',detected_in_last_days = 1, mjdnow = 60796+day, 
                            cheat_gentypes = [20,21,25,26,27,12,40,42,59,10,30,31,32,35,36,37,11])
    data_dic=data_dic['diaobject']
    good_dic = validate_objects(data_dic)
    #get features from that data
    outdir = 'TOM_days_storage'
    file_name = outdir+'/TOM_hot_features_day_'+str(day)+'.csv'
        
    fit(
        good_dic,
        output_features_file = file_name,
        feature_extractor = feature_extraction_method,
        filters='ZTF',
        additional_info=additional_info,
    )
    data = pd.read_csv(file_name, index_col=False)
    data['orig_sample'] = 'pool'
    
    data.to_csv(file_name,index=False)


In [13]:
run_one_night(50)

Not enough obs for 1002404. pass!

Not enough obs for 1003512. pass!

Not enough obs for 1019009. pass!

Not enough obs for 1021179. pass!

Not enough obs for 1026842. pass!

Not enough obs for 1028303. pass!

Not enough obs for 1035697. pass!

Not enough obs for 1050528. pass!

Not enough obs for 1105973. pass!

Not enough obs for 1113532. pass!

Not enough obs for 1125508. pass!

Not enough obs for 1134770. pass!

Not enough obs for 1136228. pass!

Not enough obs for 1146088. pass!

Not enough obs for 1165184. pass!

Not enough obs for 1203931. pass!

Not enough obs for 1236407. pass!

Not enough obs for 1236491. pass!

Not enough obs for 1237363. pass!

Not enough obs for 1238294. pass!

Not enough obs for 1238968. pass!

Not enough obs for 1245276. pass!

Not enough obs for 1247881. pass!

Not enough obs for 1251123. pass!

Not enough obs for 1252493. pass!

Not enough obs for 1279764. pass!

Not enough obs for 1289503. pass!

Not enough obs for 1301477. pass!

Not enough obs for 1

INFO:root:Starting laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor fit...
INFO:root:Features have been saved to: TOM_days_storage/TOM_hot_features_day_49.csv


Not enough obs for 1000296. pass!

Not enough obs for 1005964. pass!

Not enough obs for 1014216. pass!

Not enough obs for 1019009. pass!

Not enough obs for 1021189. pass!

Not enough obs for 1026842. pass!

Not enough obs for 1035697. pass!

Not enough obs for 1040620. pass!

Not enough obs for 1041696. pass!

Not enough obs for 1042509. pass!

Not enough obs for 1046907. pass!

Not enough obs for 1059373. pass!

Not enough obs for 1076587. pass!

Not enough obs for 1081324. pass!

Not enough obs for 1091333. pass!

Not enough obs for 1094005. pass!

Not enough obs for 1098485. pass!

Not enough obs for 1100123. pass!

Not enough obs for 1101828. pass!

Not enough obs for 1103019. pass!

Not enough obs for 1105167. pass!

Not enough obs for 1105973. pass!

Not enough obs for 1108243. pass!

Not enough obs for 1111585. pass!

Not enough obs for 1113602. pass!

Not enough obs for 1122457. pass!

Not enough obs for 1128006. pass!

Not enough obs for 1136228. pass!

Not enough obs for 1

INFO:root:Starting laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor fit...
INFO:root:Features have been saved to: TOM_days_storage/TOM_hot_features_day_50.csv


In [14]:
def run_td_loop(day):
    # feature_extraction_method = 'Malanchev'
    feature_extraction_method = 'laiss_resspect_classifier.elasticc2_laiss_feature_extractor.Elasticc2LaissFeatureExtractor'
    # classifier = 'RandomForest'
    classifier = 'laiss_resspect_classifier.laiss_classifier.LaissRandomForest'
    
    # run the loop to get queried objects and updated metrics
    days = [day-1, day+1]                                # first and last day of the survey
    training = None                           # if int take int number of objs
                                                        # for initial training, 50% being Ia
    
    strategy = 'CertaintySampling'                        # learning strategy
    batch = 5                                       # if int, ignore cost per observation,
                                                         # if None find optimal batch size
    
    sep_files = True                               # if True, expects train, test and
                                                        # validation samples in separate filess
    
    path_to_features_dir = 'TOM_days_storage/'   # folder where the files for each day are stored
    
    # output results for metrics
    output_metrics_file = 'results/metrics_' + strategy + '_' + str('ini_train_set') + \
                           '_batch' + str(batch) +  '.csv'
    
    # output query sample
    output_query_file = 'results/queried_' + strategy + '_' + str('ini_train_set') + \
                            '_batch' + str(batch) + '_day_'+ str(day) + '.csv'
    
    path_to_ini_files = {}
    
    # features from full light curves for initial training sample
    path_to_ini_files['train'] = 'TOM_days_storage/TOM_training_features'
    path_to_ini_files['test'] = 'TOM_days_storage/TOM_testing_features'
    path_to_ini_files['validation'] = 'TOM_days_storage/TOM_validation_features'

    survey='ZTF'

    n_estimators = 1000                             # number of trees in the forest

    screen = False                                  # if True will print many things for debuging
    fname_pattern = ['TOM_hot_features_day_', '.csv']                # pattern on filename where different days
                                                        # are stored
    
    queryable= False                                 # if True, check brightness before considering
                                                        # an object queryable    
    
    # run time domain loop
    time_domain_loop(TimeDomainConfiguration(days=days, output_metrics_file=output_metrics_file,
                     output_queried_file=output_query_file,
                     path_to_ini_files=path_to_ini_files,
                     path_to_features_dir=path_to_features_dir,
                     strategy=strategy, fname_pattern=fname_pattern, batch=batch,
                     classifier=classifier,
                     sep_files=sep_files,
                     survey=survey, queryable=queryable,
                     feature_extraction_method=feature_extraction_method),
                     screen=screen, n_estimators=n_estimators,
                     )

    #🔲 do we want higher entropy in our returned objects?
    # Read in RESSPECT requests to input to TOM format
    ids = list(pd.read_csv(output_query_file)['objectid'])
    ids = [int(id) for id in ids]
    num = int(len(ids)/5)
    mod = len(ids)%5
    num_list = [num]*5
    mod_list = []
    for i in range(mod):
        mod_list.append(1)
    rem = 5-len(mod_list)
    mod_list = mod_list+[0]*rem
    num_list=list(np.asarray(num_list)+mod_list)
    priorities = []
    priorities.append([1]*num_list[0]+[2]*num_list[1]+[3]*num_list[2]+[4]*num_list[3]+[5]*num_list[4])    
    priorities = priorities[0]
    
    # send these queried objects to the TOM
    # submit_queries_to_TOM('awoldag', '../../password.txt', objectids = ids, priorities = priorities, requester = 'resspect')
    print(ids, priorities)


In [15]:
run_td_loop(50)

  0% (0 of 1) |                          | Elapsed Time: 0:00:00 ETA:  --:--:--
100% (1 of 1) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01


[135887774, 117635023, 4949578, 129006369] [1, 2, 3, 4]


In [None]:
#Pull classified obj and add them to the training set
def get_classified(username, passwordfile=None, password=None, since = None):
    tom = TomClient(url = "https://desc-tom-2.lbl.gov", username = username, password = password, 
                    passwordfile = passwordfile)
    dic = {}
    if since is not None:
        dic['since'] = since

    res = tom.post( 'elasticc2/getknownspectruminfo', json=dic )

    assert res.status_code == 200
    assert res.json()['status'] == "ok"
    reqs = res.json()
    return reqs

In [None]:
classed_obj = get_classified('amandaw8', passwordfile='/Users/arw/secrets/TOM2', since = '11/22/2024 19:20:00')

In [None]:
objectids = []
classes = []
for obj in classed_obj['spectra']:
    objectids.append(obj['objectid'])
    if obj['classid'] == 2222:
        classes.append('Ia')
    else:
        classes.append('other')

In [None]:
def get_object_phot(username, passwordfile=None, password=None, obj_ids=[]):
    tom = TomClient(url = "https://desc-tom-2.lbl.gov", username = username, password = password, 
                    passwordfile = passwordfile)
    dic = {'obj_ids': obj_ids}

    res = tom.post( 'elasticc2/getobjphot', json = dic)

    assert res.status_code == 200
    assert res.json()['status'] == "ok"
    reqs = res.json()
    return reqs

In [None]:
def update_training_set(objectids, classes):
    #need to fetch the current features of the labeled objs (probably from the TOM, get the features and format correctly) 
    #! figure out how to get a specific objid photometry
    #IN THE FUTURE get this from our mongodb, in the mean time though only god knows which files will contain which objects


    # call something like elasticc2/getobjphot
    data_dic = get_object_phot(amandaw8, passwordfile = '/Users/arw/secrets/tom2', obj_ids = objectids)

    # put the ^ dictionary into the right format to get features
    data_dic=data_dic['diaobject'] 
    
    # then do something like fit_TOM to get the features from the object photometry
    outdir = 'TOM_train_features_storage'
    file_name = outdir+'/TOM_train_features_day_'+str(day)+'.csv'
        
    fit_TOM(data_dic, output_features_file = file_name, feature_extractor = 'Malanchev')
    data = pd.read_csv(file_name, index_col=False)
    data['orig_sample'] = 'train'
    
    data.to_csv(file_name,index=False)

    # then do something like read this file in and concatenate it with the current training set



    
    #REMOVE classified objects from the pool set each night. Just double check that 
    #all SN in hot transients DO NOT have same object ids as those in the training set
    #CHECK WITH ROB - can we make it so that gethotsne removed classified obj

    
    

In [None]:
import progressbar
import time

for i in progressbar.progressbar(range(10)):
    time.sleep(0.1)