In [1]:
from collections import OrderedDict, Counter
import time
import sys
import os
import antares_client
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

plt.style.use('fig_publication.mplstyle')
%config InlineBackend.figure_format = 'retina'  #for MacOS, make plots crisp

In [2]:
# From 106 available features from Kostya's lc_feature_extractor, use the 82 from SNAD Miner paper 
# R and g bands
feature_names_r_g = ['feature_amplitude_magn_r', 
                         'feature_anderson_darling_normal_magn_r',
                         'feature_beyond_1_std_magn_r', 
                         'feature_beyond_2_std_magn_r',
                         'feature_cusum_magn_r', 
                         #'feature_eta_e_magn_r',
                         'feature_inter_percentile_range_2_magn_r',
                         'feature_inter_percentile_range_10_magn_r',
                         'feature_inter_percentile_range_25_magn_r', 
                         'feature_kurtosis_magn_r',
                         'feature_linear_fit_slope_magn_r',
                         'feature_linear_fit_slope_sigma_magn_r',
                         #'feature_linear_fit_reduced_chi2_magn_r', 
                         #'feature_linear_trend_magn_r', # cadence removal
                         #'feature_linear_trend_sigma_magn_r',  # cadence removal
                         'feature_magnitude_percentage_ratio_40_5_magn_r',
                         'feature_magnitude_percentage_ratio_20_5_magn_r',
                         #'feature_maximum_slope_magn_r',
                         'feature_mean_magn_r',
                         'feature_median_absolute_deviation_magn_r',
                         'feature_percent_amplitude_magn_r',
                         'feature_median_buffer_range_percentage_10_magn_r',
                         'feature_median_buffer_range_percentage_20_magn_r',
                         'feature_percent_difference_magnitude_percentile_5_magn_r',
                         'feature_percent_difference_magnitude_percentile_10_magn_r',
                         #'feature_period_0_magn_r',  # should be negated
                         #'feature_period_s_to_n_0_magn_r', # cadence removal
                         #'feature_period_1_magn_r', 
                         #'feature_period_s_to_n_1_magn_r', # cadence removal
                         #'feature_period_2_magn_r', 
                         #'feature_period_s_to_n_2_magn_r', # cadence removal
                         #'feature_period_3_magn_r', 
                         #'feature_period_s_to_n_3_magn_r', # cadence removal
                         #'feature_period_4_magn_r', 
                         #'feature_period_s_to_n_4_magn_r', # cadence removal
                         #'feature_periodogram_amplitude_magn_r',
                         #'feature_periodogram_beyond_2_std_magn_r',  # cadence removal
                         #'feature_periodogram_beyond_3_std_magn_r',  # cadence removal
                         #'feature_periodogram_standard_deviation_magn_r',   # cadence removal
                         #'feature_chi2_magn_r',
                         'feature_skew_magn_r', 
                         'feature_standard_deviation_magn_r',
                         'feature_stetson_k_magn_r',
                         'feature_weighted_mean_magn_r',
                         'feature_anderson_darling_normal_flux_r', 
                         'feature_cusum_flux_r',
                         #'feature_eta_e_flux_r', 
                         'feature_excess_variance_flux_r',
                         'feature_kurtosis_flux_r', 
                         'feature_mean_variance_flux_r',
                         #'feature_chi2_flux_r', 
                         'feature_skew_flux_r',
                         'feature_stetson_k_flux_r',

                         'feature_amplitude_magn_g', 
                         'feature_anderson_darling_normal_magn_g',
                         'feature_beyond_1_std_magn_g', 
                         'feature_beyond_2_std_magn_g',
                         'feature_cusum_magn_g', 
                         #'feature_eta_e_magn_g',
                         'feature_inter_percentile_range_2_magn_g',
                         'feature_inter_percentile_range_10_magn_g',
                         'feature_inter_percentile_range_25_magn_g', 
                         'feature_kurtosis_magn_g',
                         'feature_linear_fit_slope_magn_g',
                         'feature_linear_fit_slope_sigma_magn_g',
                         #'feature_linear_fit_reduced_chi2_magn_g', 
                         #'feature_linear_trend_magn_g', # cadence removal
                         #'feature_linear_trend_sigma_magn_g',  # cadence removal
                         'feature_magnitude_percentage_ratio_40_5_magn_g',
                         'feature_magnitude_percentage_ratio_20_5_magn_g',
                         #'feature_maximum_slope_magn_g', 
                         'feature_mean_magn_g',
                         'feature_median_absolute_deviation_magn_g',
                         'feature_median_buffer_range_percentage_10_magn_g',
                         'feature_median_buffer_range_percentage_20_magn_g',
                         'feature_percent_amplitude_magn_g',
                         'feature_percent_difference_magnitude_percentile_5_magn_g',
                         'feature_percent_difference_magnitude_percentile_10_magn_g',
                         #'feature_period_0_magn_g',  # should be negated
                         #'feature_period_s_to_n_0_magn_g', # cadence removal
                         #'feature_period_1_magn_g', 
                         #'feature_period_s_to_n_1_magn_g', # cadence removal
                         #'feature_period_2_magn_g', 
                         #'feature_period_s_to_n_2_magn_g', # cadence removal
                         #'feature_period_3_magn_g', 
                         #'feature_period_s_to_n_3_magn_g', # cadence removal
                         #'feature_period_4_magn_g', 
                         #'feature_period_s_to_n_4_magn_g', # cadence removal
                         #'feature_periodogram_amplitude_magn_g',
                         #'feature_periodogram_beyond_2_std_magn_g',  # cadence removal
                         #'feature_periodogram_beyond_3_std_magn_g', # cadence removal
                         #'feature_periodogram_standard_deviation_magn_g',  # cadence removal
                         #'feature_chi2_magn_g',
                         'feature_skew_magn_g', 
                         'feature_standard_deviation_magn_g',
                         'feature_stetson_k_magn_g', 
                         'feature_weighted_mean_magn_g',
                         'feature_anderson_darling_normal_flux_g', 
                         'feature_cusum_flux_g',
                         #'feature_eta_e_flux_g', 
                         'feature_excess_variance_flux_g',
                         'feature_kurtosis_flux_g', 
                         'feature_mean_variance_flux_g',
                         #'feature_chi2_flux_g', 
                         'feature_skew_flux_g',
                         'feature_stetson_k_flux_g'] 
    
ztf_id_and_features_r_g = ['Unnamed: 0'] + ['locus_id', 'ra', 'dec',\
                            'tags', 'catalogs', 'ztf_object_id', 'ztf_ssnamenr', 'num_alerts',\
                            'num_mag_values', 'oldest_alert_id', 'oldest_alert_magnitude',\
                            'oldest_alert_observation_time', 'newest_alert_id',\
                            'newest_alert_magnitude', 'newest_alert_observation_time',\
                            'brightest_alert_id', 'brightest_alert_magnitude', \
                            'brightest_alert_observation_time'] + feature_names_r_g +\
                        ['horizons_targetname', 'anomaly', 'anom_score', 'anomaly_score',\
                         'anomaly_mask', 'anomaly_type', 'is_corrected', 'vpdf_extreme_version',\
                         'vpdf_extreme_faint', 'vpdf_extreme_bright', 'locus_gal_l', 'locus_gal_b']


feature_names_hostgal = [
#  'Unnamed: 0',
#  'level_0',
#  'index',
#  'objName',
#  'objAltName1',
#  'objAltName2',
#  'objAltName3',
#  'objID',
#  'uniquePspsOBid',
#  'ippObjID',
#  'surveyID',
#  'htmID',
#  'zoneID',
#  'tessID',
#  'projectionID',
#  'skyCellID',
#  'randomID',
#  'batchID',
#  'dvoRegionID',
#  'processingVersion',
#  'objInfoFlag',
#  'qualityFlag',
#  'raStack',
#  'decStack',
#  'raStackErr',
#  'decStackErr',
#  'raMean',
#  'decMean',
#  'raMeanErr',
#  'decMeanErr',
#  'epochMean',
#  'posMeanChisq',
#  'cx',
#  'cy',
#  'cz',
#  'lambda',
#  'beta',
#  'l',
#  'b',
#  'nStackObjectRows',
#  'nStackDetections',
#  'nDetections',
#  'ng',
#  'nr',
#  'ni',
#  'nz',
#  'ny',
#  'uniquePspsSTid',
#  'primaryDetection',
#  'bestDetection',
#  'gippDetectID',
#  'gstackDetectID',
#  'gstackImageID',
#  'gra',
#  'gdec',
#  'graErr',
#  'gdecErr',
#  'gEpoch',
#  'gPSFMag',
#  'gPSFMagErr',
#  'gApMag',
#  'gApMagErr',
#  'gKronMag',
#  'gKronMagErr',
#  'ginfoFlag',
#  'ginfoFlag2',
#  'ginfoFlag3',
#  'gnFrames',
#  'gxPos',
#  'gyPos',
#  'gxPosErr',
#  'gyPosErr',
#  'gpsfMajorFWHM',
#  'gpsfMinorFWHM',
#  'gpsfTheta',
#  'gpsfCore',
#  'gpsfLikelihood',
#  'gpsfQf',
#  'gpsfQfPerfect',
#  'gpsfChiSq',
 'gmomentXX',
 'gmomentXY',
 'gmomentYY',
 'gmomentR1',
 'gmomentRH',
 'gPSFFlux',
#  'gPSFFluxErr',
 'gApFlux',
#  'gApFluxErr',
#  'gApFillFac',
#  'gApRadius',
 'gKronFlux',
#  'gKronFluxErr',
 'gKronRad',
#  'gexpTime',
 'gExtNSigma',
#  'gsky',
#  'gskyErr',
#  'gzp',
#  'gPlateScale',
#  'rippDetectID',
#  'rstackDetectID',
#  'rstackImageID',
#  'rra',
#  'rdec',
#  'rraErr',
#  'rdecErr',
#  'rEpoch',
# 'rPSFMag',
#  'rPSFMagErr',
# 'rApMag',
#  'rApMagErr',
# 'rKronMag',
#  'rKronMagErr',
#  'rinfoFlag',
#  'rinfoFlag2',
#  'rinfoFlag3',
#  'rnFrames',
#  'rxPos',
#  'ryPos',
#  'rxPosErr',
#  'ryPosErr',
#  'rpsfMajorFWHM',
#  'rpsfMinorFWHM',
#  'rpsfTheta',
#  'rpsfCore',
#  'rpsfLikelihood',
#  'rpsfQf',
#  'rpsfQfPerfect',
#  'rpsfChiSq',
 'rmomentXX',
 'rmomentXY',
 'rmomentYY',
 'rmomentR1',
 'rmomentRH',
'rPSFFlux',
#  'rPSFFluxErr',
'rApFlux',
#  'rApFluxErr',
#  'rApFillFac',
# 'rApRadius',
'rKronFlux',
#  'rKronFluxErr',
'rKronRad',
#  'rexpTime',
 'rExtNSigma',
#  'rsky',
#  'rskyErr',
#  'rzp',
#  'rPlateScale',
#  'iippDetectID',
#  'istackDetectID',
#  'istackImageID',
#  'ira',
#  'idec',
#  'iraErr',
#  'idecErr',
#  'iEpoch',
#  'iPSFMag',
#  'iPSFMagErr',
#  'iApMag',
#  'iApMagErr',
#  'iKronMag',
#  'iKronMagErr',
#  'iinfoFlag',
#  'iinfoFlag2',
#  'iinfoFlag3',
#  'inFrames',
#  'ixPos',
#  'iyPos',
#  'ixPosErr',
#  'iyPosErr',
#  'ipsfMajorFWHM',
#  'ipsfMinorFWHM',
#  'ipsfTheta',
#  'ipsfCore',
#  'ipsfLikelihood',
#  'ipsfQf',
#  'ipsfQfPerfect',
#  'ipsfChiSq',
  'imomentXX',
 'imomentXY',
 'imomentYY',
 'imomentR1',
 'imomentRH',
 'iPSFFlux',
#  'iPSFFluxErr',
 'iApFlux',
#  'iApFluxErr',
#  'iApFillFac',
#  'iApRadius',
 'iKronFlux',
#  'iKronFluxErr',
 'iKronRad',
#  'iexpTime',
  'iExtNSigma',
#  'isky',
#  'iskyErr',
#  'izp',
#  'iPlateScale',
#  'zippDetectID',
#  'zstackDetectID',
#  'zstackImageID',
#  'zra',
#  'zdec',
#  'zraErr',
#  'zdecErr',
#  'zEpoch',
#  'zPSFMag',
#  'zPSFMagErr',
#  'zApMag',
#  'zApMagErr',
#  'zKronMag',
#  'zKronMagErr',
#  'zinfoFlag',
#  'zinfoFlag2',
#  'zinfoFlag3',
#  'znFrames',
#  'zxPos',
#  'zyPos',
#  'zxPosErr',
#  'zyPosErr',
#  'zpsfMajorFWHM',
#  'zpsfMinorFWHM',
#  'zpsfTheta',
#  'zpsfCore',
#  'zpsfLikelihood',
#  'zpsfQf',
#  'zpsfQfPerfect',
#  'zpsfChiSq',
  'zmomentXX',
 'zmomentXY',
 'zmomentYY',
 'zmomentR1',
 'zmomentRH',
 'zPSFFlux',
# #  'zPSFFluxErr',
 'zApFlux',
# #  'zApFluxErr',
# #  'zApFillFac',
# #  'zApRadius',
 'zKronFlux',
# #  'zKronFluxErr',
 'zKronRad',
# #  'zexpTime',
  'zExtNSigma',
#  'zsky',
#  'zskyErr',
#  'zzp',
#  'zPlateScale',
#  'yippDetectID',
#  'ystackDetectID',
#  'ystackImageID',
#  'yra',
#  'ydec',
#  'yraErr',
#  'ydecErr',
#  'yEpoch',
#  'yPSFMag',
#  'yPSFMagErr',
#  'yApMag',
#  'yApMagErr',
#  'yKronMag',
#  'yKronMagErr',
#  'yinfoFlag',
#  'yinfoFlag2',
#  'yinfoFlag3',
#  'ynFrames',
#  'yxPos',
#  'yyPos',
#  'yxPosErr',
#  'yyPosErr',
#  'ypsfMajorFWHM',
#  'ypsfMinorFWHM',
#  'ypsfTheta',
#  'ypsfCore',
#  'ypsfLikelihood',
#  'ypsfQf',
#  'ypsfQfPerfect',
#  'ypsfChiSq',
  'ymomentXX',
  'ymomentXY',
  'ymomentYY',
  'ymomentR1',
  'ymomentRH',
  'yPSFFlux',
# #   'yPSFFluxErr',
  'yApFlux',
# #   'yApFluxErr',
# #   'yApFillFac',
# #  'yApRadius',
 'yKronFlux',
# #  'yKronFluxErr',
 'yKronRad',
# #  'yexpTime',
  'yExtNSigma',
#  'ysky',
#  'yskyErr',
#  'yzp',
#  'yPlateScale',
#  'distance',
#  'SkyMapper_StarClass',
#  'gelong',
#  'g_a',
#  'g_b',
#  'g_pa',
#  'relong',
#  'r_a',
#  'r_b',
#  'r_pa',
#  'ielong',
#  'i_a',
#  'i_b',
#  'i_pa',
#  'zelong',
#  'z_a',
#  'z_b',
#  'z_pa',
   'i-z', # try throwing in
#    'g-r',
#    'r-i',
#    'g-i',
#    'z-y',
#   'g-rErr',
#   'r-iErr',
#   'i-zErr',
#   'z-yErr',
 'gApMag_gKronMag',
 'rApMag_rKronMag',
 'iApMag_iKronMag',
 'zApMag_zKronMag',
 'yApMag_yKronMag',
 '7DCD', 
#  'NED_name',
#  'NED_type',
#  'NED_vel',
#  'NED_redshift',
#  'NED_mag',
#  'class',
   'dist/DLR',
#   'dist',
#  'TransientClass',
#  'TransientRA',
#  'TransientDEC'
   ]

feature_names_tns = ['has_tns', 'tns_cls', 'spec_z', 'report_group']

lc_and_host_features = feature_names_r_g+feature_names_hostgal
lc_and_host_and_tns_features = lc_and_host_features + feature_names_tns

In [3]:
def plot_conf_matrix(y_test, y_pred, labels, title, kind, figsize=(6, 4)):
    if kind == 'completeness':
        counts = confusion_matrix(y_test, y_pred, labels=labels, normalize=None)
        recall = confusion_matrix(y_test, y_pred, labels=labels, normalize='true')
        annotations = np.vectorize(lambda c, r: f'{r:.2g}\n({c})')(counts, recall)
        heatmap = pd.DataFrame(recall, index=labels, columns=labels)
    
    if kind == 'purity':
        counts = confusion_matrix(y_test, y_pred, labels=labels, normalize=None)
        recall = confusion_matrix(y_test, y_pred, labels=labels, normalize='pred')
        annotations = np.vectorize(lambda c, r: f'{r:.2g}\n({c})')(counts, recall)
        heatmap = pd.DataFrame(recall, index=labels, columns=labels)
    
    plt.figure(figsize=figsize)
    sns.heatmap(heatmap, annot=annotations, fmt='s', cmap='Blues', vmin=0, vmax=1, annot_kws={"fontsize":18})
    plt.title(title, fontsize=24)
    plt.ylabel('True class', fontsize=24)
    plt.xlabel('Predicted class', fontsize=24)
    #plt.show()

In [4]:
def plot_RFC_prob_vs_lc(dataframe, anom_thresh):
    import antares_client

    df_path = "/Users/patrickaleo/Desktop/Illinois/LAISS-antares/repo/CVAE_dataframes"
    anom_thresh = anom_thresh

    num_anom_epochs_l = []
    anom_ztfid_l, tnscls_l = [],[]
    for anom_ztfid, tnscls in zip(dataframe.index, dataframe.tns_cls):
        print(anom_ztfid, tnscls)
        try:
            lc_and_hosts_df = pd.read_csv(f'{df_path}/{anom_ztfid}_timeseries.csv')
        except:
            print(f"couldn't feature space as func of time for {anom_ztfid}. Make anom epoch -9")
            num_anom_epochs_l.append(-9)
            continue

        lc_and_hosts_df = lc_and_hosts_df.dropna()
        anom_obj_df = lc_and_hosts_df[x_test.columns]

        
        try:
            pred_prob_anom = 100 * clf.predict_proba(anom_obj_df)
            num_anom_epochs = len(np.where(pred_prob_anom[:, 1]>=anom_thresh)[0])
            num_anom_epochs_l.append(num_anom_epochs)
        except:
            print(f"{anom_ztfid} has some NaN host galaxy values from PS1 catalog. Skip!")
            continue

        try: 
            #anom_idx = np.where(pred_prob_anom[:, 1]>=50)[0][0]
            orig = pd.read_csv(f'{df_path}/{anom_ztfid}_timeseries.csv')
            diff = len(orig)-len(pred_prob_anom)
            #if len(np.where(pred_prob_anom[:, 1]>=anom_thresh)) == 1:
            if not np.where(pred_prob_anom[:, 1]>=anom_thresh)[0][0] == 0:
                anom_idx = lc_and_hosts_df.loc[np.where(pred_prob_anom[:, 1]>=anom_thresh)[0][0]].obs_num + diff
            else: anom_idx = lc_and_hosts_df.iloc[np.where(pred_prob_anom[:, 1]>=anom_thresh)[0][0]].obs_num + diff
            anom_idx_is = True

        except: 
            print(f"Prediction doesn't exceed anom_threshold of {anom_thresh}% for {anom_ztfid}.")
            anom_idx_is = False

        ztf_id_ref = anom_ztfid 

        ref_info = antares_client.search.get_by_ztf_object_id(ztf_object_id=ztf_id_ref)

        try: df_ref = ref_info.timeseries.to_pandas()
        except: 
            print(f"No ref_info.timeseries.to_pandas() for {anom_ztfid}. Continue...")
            continue

        print(num_anom_epochs)
        anom_ztfid_l.append(anom_ztfid), tnscls_l.append(tnscls)

        df_ref_g = df_ref[(df_ref.ant_passband == 'g') & (~df_ref.ant_mag.isna())]
        df_ref_r = df_ref[(df_ref.ant_passband == 'R') & (~df_ref.ant_mag.isna())]

        mjd_idx_at_min_mag_r_ref = df_ref_r[['ant_mag']].reset_index().idxmin().ant_mag
        mjd_idx_at_min_mag_g_ref = df_ref_g[['ant_mag']].reset_index().idxmin().ant_mag

        fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(7,10))
        ax1.invert_yaxis()
        ax1.errorbar(x=df_ref_r.ant_mjd, y=df_ref_r.ant_mag, yerr=df_ref_r.ant_magerr, fmt='o', c='r', 
                    label=f'REF: {ztf_id_ref}')
        ax1.errorbar(x=df_ref_g.ant_mjd, y=df_ref_g.ant_mag, yerr=df_ref_g.ant_magerr, fmt='o', c='g')
        #ax1.axvline(x=lc_and_hosts_df.loc[lc_and_hosts_df.loc[anom_idx].obs_num].mjd_cutoff)
        if anom_idx_is == True:
            ax1.axvline(x=lc_and_hosts_df[lc_and_hosts_df.obs_num == anom_idx].mjd_cutoff.values[0])
        #ax1.set_xlim(min(df_ref_r.ant_mjd), max(df_ref_r.ant_mjd))
        ax1.set_ylabel('Mag')


        print(f'https://alerce.online/object/{anom_ztfid}', tnscls)
        ax2.plot(lc_and_hosts_df['mjd_cutoff'], pred_prob_anom[:, 0], label='p(Normal)')
        ax2.plot(lc_and_hosts_df['mjd_cutoff'], pred_prob_anom[:, 1], label='p(Other)')
        #ax2.axhline(y=50)

        ax2.set_ylabel('Probability (%)')


        #ax2.set_xlim(min(df_ref_r.ant_mjd), max(df_ref_r.ant_mjd))

        plt.xlabel('mjd_cutoff')
        plt.legend()
        plt.show()
        
def plot_RFC_prob_vs_lc_ztfid(anom_ztfid, anom_thresh, df_path):
    # Predict Excluded Object
    import antares_client
    
    df_path = df_path
    anom_thresh = anom_thresh

    try:
        lc_and_hosts_df = pd.read_csv(f'{df_path}/{anom_ztfid}_timeseries.csv')
    except:
        print(f"couldn't feature space as func of time for {anom_ztfid}. pass.")
        pass

    lc_and_hosts_df = lc_and_hosts_df.dropna()
    anom_obj_df = lc_and_hosts_df[x_test.columns]

    try:
        pred_prob_anom = 100 * clf.predict_proba(anom_obj_df)
        num_anom_epochs = len(np.where(pred_prob_anom[:, 1]>=anom_thresh)[0])
    except:
        print(f"{anom_ztfid} has some NaN host galaxy values from PS1 catalog. Skip!")
        return

    try: 
        #anom_idx = np.where(pred_prob_anom[:, 1]>=50)[0][0]
        orig = pd.read_csv(f'{df_path}/{anom_ztfid}_timeseries.csv')
        diff = len(orig)-len(pred_prob_anom)
        anom_idx = lc_and_hosts_df.loc[np.where(pred_prob_anom[:, 1]>=anom_thresh)[0][0]].obs_num + diff
        anom_idx_is = True

    except: 
        print(f"Prediction doesn't exceed anom_threshold of {anom_thresh}% for {anom_ztfid}.")
        anom_idx_is = False
    
    ztf_id_ref = anom_ztfid 

    ref_info = antares_client.search.get_by_ztf_object_id(ztf_object_id=ztf_id_ref)

    df_ref = ref_info.timeseries.to_pandas()

    df_ref_g = df_ref[(df_ref.ant_passband == 'g') & (~df_ref.ant_mag.isna())]
    df_ref_r = df_ref[(df_ref.ant_passband == 'R') & (~df_ref.ant_mag.isna())]

    mjd_idx_at_min_mag_r_ref = df_ref_r[['ant_mag']].reset_index().idxmin().ant_mag
    mjd_idx_at_min_mag_g_ref = df_ref_g[['ant_mag']].reset_index().idxmin().ant_mag

    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(7,10))
    ax1.invert_yaxis()
    ax1.errorbar(x=df_ref_r.ant_mjd, y=df_ref_r.ant_mag, yerr=df_ref_r.ant_magerr, fmt='o', c='r', 
                label=f'REF: {ztf_id_ref}')
    ax1.errorbar(x=df_ref_g.ant_mjd, y=df_ref_g.ant_mag, yerr=df_ref_g.ant_magerr, fmt='o', c='g')
    if anom_idx_is == True:
        ax1.axvline(x=lc_and_hosts_df[lc_and_hosts_df.obs_num == anom_idx].mjd_cutoff.values[0])
    #ax1.set_xlim(min(df_ref_r.ant_mjd), max(df_ref_r.ant_mjd))

    print(f'https://alerce.online/object/{anom_ztfid}')
    ax2.plot(lc_and_hosts_df['mjd_cutoff'], pred_prob_anom[:, 0], label='p(Normal)')
    ax2.plot(lc_and_hosts_df['mjd_cutoff'], pred_prob_anom[:, 1], label='p(Other)')
    #ax2.axhline(y=50)

    #ax2.set_xlim(min(df_ref_r.ant_mjd), max(df_ref_r.ant_mjd))

    plt.xlabel('mjd_cutoff')
    plt.ylabel('Probability (%)')
    plt.legend()
    plt.show()

In [5]:
dfs = []
for idx in range(1, 57):
    df = pd.read_csv(f'../tables/before_lc-feature-extractor_tns_search/{idx}_of_56.csv', index_col=0)
    dfs.append(df)
    
all_tns_result_df = pd.concat(dfs, axis=0).reset_index(drop=True)
all_tns_result_df.to_csv('../tables/before_lc-feature-extractor_tns_search_2778spec.csv', index=False)
all_tns_result_df 

Unnamed: 0,Name,RA,DEC,Obj. Type,Redshift,Host Name,Host Redshift,Reporting Group/s,Discovery Data Source/s,Classifying Group/s,...,Public,End Prop. Period,Discovery Mag/Flux,Discovery Filter,Discovery Date (UT),Sender,Remarks,Discovery Bibcode,Classification Bibcodes,Ext. catalog/s
0,SN 2020aewj,17:41:43.138,+67:57:42.59,SN Ia,0.055000,,,ZTF,ZTF,ZTF,...,1,,19.4900,g-ZTF,2020-01-27 11:41:51.360,Goobar,,2021TNSTR.335....1G,2021TNSCR.341....1P,
1,SN 2020abjx,02:15:02.300,-08:37:43.70,SLSN-I,0.390000,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,...,1,,20.7600,r-Sloan,2020-09-19 09:35:34.080,Yan,,2020TNSTR3637....1Y,2020TNSCR3639....1Y,
2,SN 2020zct,06:32:50.530,-12:23:00.56,SN Iax[02cx-like],0.140000,,,REFITT,ZTF,REFITT,...,1,,19.8499,g-PTF,2020-09-26 11:35:27.744,REFITT_BOT,,2020TNSTR3408....1W,2020TNSCR3413....1W,
3,SN 2020vdc,02:58:54.456,-10:33:53.12,SN II,0.034400,,,"ZTF, ATLAS, Pan-STARRS","ZTF, ATLAS, Pan-STARRS",ZTF,...,1,,19.5800,g-ZTF,2020-09-29 09:09:49.248,ZTF_AMPEL_COMPLETE,,2020TNSTR3060....1N,2020TNSCR3640....1Y,
4,SN 2020uyr,22:33:34.050,-20:46:01.13,SN Ia,0.067767,,,"ZTF, Pan-STARRS, ATLAS, GaiaAlerts","ZTF, Pan-STARRS, ATLAS, GaiaAlerts",ZTF,...,1,,20.1000,r-ZTF,2020-09-20 08:02:24.000,ZTF_Bot1,,2020TNSTR3038....1F,2020TNSCR3122....1D,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2773,SN 2018aey,10:51:16.440,+22:28:39.40,SN Ia,0.077000,,0.0767,ZTF,ZTF,ZTF,...,1,2018-04-24,18.4000,r-ZTF,2018-03-05 06:08:38.000,ZTF_Bot1,,2018TNSTR.545....1L,2018TNSCR.547....1N,
2774,SN 2018aex,11:00:45.380,+22:17:14.96,SN II,0.023000,,0.0229,"ZTF, GaiaAlerts, ATLAS","ZTF, GaiaAlerts, ATLAS",ZTF,...,1,,18.1000,r-ZTF,2018-03-05 06:07:12.000,ZTF_Bot1,,2018TNSTR.545....1L,2018TNSCR.547....1N,
2775,SN 2018yt,08:59:35.590,+45:36:41.90,SN II,0.052000,,,ZTF,ZTF,ZTF,...,1,2018-04-24,18.9000,r-ZTF,2018-02-09 06:00:00.000,ZTF_Bot1,,2018TNSTR.544....1T,2018TNSCR.548....1T,
2776,SN 2018mg,10:56:53.580,+79:16:42.50,SN Ia,0.110000,,,ZTF,ZTF,ZTF,...,1,2018-02-07,20.1000,g-ZTF,2018-01-11 10:52:26.000,ZTF_Bot1,,2018TNSTR.172....1L,2018TNSCR.175....1F,


In [6]:
all_tns_result_df = pd.read_csv('../tables/before_lc-feature-extractor_tns_search_2778spec.csv', index_col=0)
all_tns_result_df

Unnamed: 0_level_0,RA,DEC,Obj. Type,Redshift,Host Name,Host Redshift,Reporting Group/s,Discovery Data Source/s,Classifying Group/s,Associated Group/s,...,Public,End Prop. Period,Discovery Mag/Flux,Discovery Filter,Discovery Date (UT),Sender,Remarks,Discovery Bibcode,Classification Bibcodes,Ext. catalog/s
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SN 2020aewj,17:41:43.138,+67:57:42.59,SN Ia,0.055000,,,ZTF,ZTF,ZTF,ZTF,...,1,,19.4900,g-ZTF,2020-01-27 11:41:51.360,Goobar,,2021TNSTR.335....1G,2021TNSCR.341....1P,
SN 2020abjx,02:15:02.300,-08:37:43.70,SLSN-I,0.390000,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,20.7600,r-Sloan,2020-09-19 09:35:34.080,Yan,,2020TNSTR3637....1Y,2020TNSCR3639....1Y,
SN 2020zct,06:32:50.530,-12:23:00.56,SN Iax[02cx-like],0.140000,,,REFITT,ZTF,REFITT,REFITT; ZTF,...,1,,19.8499,g-PTF,2020-09-26 11:35:27.744,REFITT_BOT,,2020TNSTR3408....1W,2020TNSCR3413....1W,
SN 2020vdc,02:58:54.456,-10:33:53.12,SN II,0.034400,,,"ZTF, ATLAS, Pan-STARRS","ZTF, ATLAS, Pan-STARRS",ZTF,ATLAS; Pan-STARRS; ZTF,...,1,,19.5800,g-ZTF,2020-09-29 09:09:49.248,ZTF_AMPEL_COMPLETE,,2020TNSTR3060....1N,2020TNSCR3640....1Y,
SN 2020uyr,22:33:34.050,-20:46:01.13,SN Ia,0.067767,,,"ZTF, Pan-STARRS, ATLAS, GaiaAlerts","ZTF, Pan-STARRS, ATLAS, GaiaAlerts",ZTF,ATLAS; GaiaAlerts; Pan-STARRS; ZTF,...,1,,20.1000,r-ZTF,2020-09-20 08:02:24.000,ZTF_Bot1,,2020TNSTR3038....1F,2020TNSCR3122....1D,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SN 2018aey,10:51:16.440,+22:28:39.40,SN Ia,0.077000,,0.0767,ZTF,ZTF,ZTF,ZTF,...,1,2018-04-24,18.4000,r-ZTF,2018-03-05 06:08:38.000,ZTF_Bot1,,2018TNSTR.545....1L,2018TNSCR.547....1N,
SN 2018aex,11:00:45.380,+22:17:14.96,SN II,0.023000,,0.0229,"ZTF, GaiaAlerts, ATLAS","ZTF, GaiaAlerts, ATLAS",ZTF,ATLAS; GaiaAlerts; ZTF,...,1,,18.1000,r-ZTF,2018-03-05 06:07:12.000,ZTF_Bot1,,2018TNSTR.545....1L,2018TNSCR.547....1N,
SN 2018yt,08:59:35.590,+45:36:41.90,SN II,0.052000,,,ZTF,ZTF,ZTF,ZTF,...,1,2018-04-24,18.9000,r-ZTF,2018-02-09 06:00:00.000,ZTF_Bot1,,2018TNSTR.544....1T,2018TNSCR.548....1T,
SN 2018mg,10:56:53.580,+79:16:42.50,SN Ia,0.110000,,,ZTF,ZTF,ZTF,ZTF,...,1,2018-02-07,20.1000,g-ZTF,2018-01-11 10:52:26.000,ZTF_Bot1,,2018TNSTR.172....1L,2018TNSCR.175....1F,


In [7]:
Counter(all_tns_result_df["Obj. Type"])

Counter({'SN Ia': 1947,
         'SLSN-I': 36,
         'SN Iax[02cx-like]': 9,
         'SN II': 342,
         'SN Ic': 55,
         'SLSN-II': 31,
         'SN IIn': 79,
         'SN Ia-91T-like': 63,
         'SN IIb': 34,
         'SN Ib': 42,
         'SN IIP': 35,
         'SN Ic-BL': 27,
         'SN Ib/c': 10,
         'SN Ia-pec': 16,
         'SN Ia-CSM': 4,
         'SN Ia-91bg-like': 11,
         'TDE': 13,
         'SN Ibn': 11,
         'Afterglow': 1,
         'SN': 2,
         'SN Ia-Ca-rich': 1,
         'SN Icn': 1,
         'SN II-pec': 1,
         'ILRT': 1,
         'SN I': 1,
         'SN Ib-Ca-rich': 2,
         'SN IIL': 1,
         'LBV': 1,
         'LRN': 1})

In [8]:
all_tns_result_df["Disc. Internal Name"]

Name
SN 2020aewj             NaN
SN 2020abjx             NaN
SN 2020zct     ZTF20acezhcf
SN 2020vdc     ZTF20achuhlt
SN 2020uyr     ZTF20achatyv
                   ...     
SN 2018aey     ZTF18aabsyqp
SN 2018aex     ZTF18aabssth
SN 2018yt      ZTF18aaayemw
SN 2018mg      ZTF18aaaoktp
SN 2018mf      ZTF18aaaoaeq
Name: Disc. Internal Name, Length: 2778, dtype: object

In [9]:
all_tns_result_df[all_tns_result_df["Disc. Internal Name"].isna()]

Unnamed: 0_level_0,RA,DEC,Obj. Type,Redshift,Host Name,Host Redshift,Reporting Group/s,Discovery Data Source/s,Classifying Group/s,Associated Group/s,...,Public,End Prop. Period,Discovery Mag/Flux,Discovery Filter,Discovery Date (UT),Sender,Remarks,Discovery Bibcode,Classification Bibcodes,Ext. catalog/s
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SN 2020aewj,17:41:43.138,+67:57:42.59,SN Ia,0.055,,,ZTF,ZTF,ZTF,ZTF,...,1,,19.49,g-ZTF,2020-01-27 11:41:51.360,Goobar,,2021TNSTR.335....1G,2021TNSCR.341....1P,
SN 2020abjx,02:15:02.300,-08:37:43.70,SLSN-I,0.39,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,20.76,r-Sloan,2020-09-19 09:35:34.080,Yan,,2020TNSTR3637....1Y,2020TNSCR3639....1Y,
SN 2020ssf,23:38:40.872,+15:57:14.55,SN Ia,0.01335,NGC 7722,0.01335,"ZTF, Pan-STARRS","ZTF, Pan-STARRS",Global SN Project,Pan-STARRS; ZTF,...,1,,17.94,i-ZTF,2020-09-06 06:02:44.160,Ho,,2020TNSTR2738....1H,2020TNSCR2763....1H,
SN 2020dcn,13:45:22.580,+39:44:16.60,SN Ia,0.2177,SDSS J134522.58+394416.2,,ZUDS,ZTF,,ZTF,...,1,,21.2,r-ZTF,2020-01-28 00:00:00.000,Goldstein,,2020TNSTR.600....1G,2022TNSCR3049....1N,
SN 2019aafk,22:56:20.930,+45:12:45.20,SN IIn,0.162,,,ZTF,ZTF,ZTF,ZTF,...,1,,20.81,r-Sloan,2019-09-19 04:02:21.120,Yan,,2020TNSTR2673....1Y,2020TNSCR2676....1D,
SN 2019aafj,14:48:39.800,+30:40:40.80,SN IIn,0.113,,,ZTF,ZTF,ZTF,ZTF,...,1,,20.81,r-Sloan,2019-07-27 04:06:05.760,Yan,,2020TNSTR2672....1G,2020TNSCR2677....1Y,
SN 2019aafi,22:31:58.180,+43:50:22.10,SN IIn,0.117,,,ZTF,ZTF,ZTF,ZTF,...,1,,19.78,r-Sloan,2019-04-09 12:07:20.640,Yan,,2020TNSTR2672....1G,2020TNSCR2677....1Y,
SN 2019afa,14:57:27.816,+08:47:09.03,SN Ia,0.078,,,"ZTF, ATLAS","ZTF, ATLAS",ePESSTO,ATLAS; ZTF,...,1,,19.485,g-ZTF,2019-01-24 12:52:33.000,Nordin,,2019TNSTR.149....1N,2019TNSCR.159....1P,
SN 2018lua,17:05:54.580,+33:31:15.10,SN IIn,0.101,,,ZTF,ZTF,ZTF,ZTF,...,1,,20.34,r-Sloan,2018-04-14 11:38:24.000,Yan,,2020TNSTR2660....1G,2020TNSCR2665....1Y,
SN 2018hqh,10:40:33.462,+47:02:24.37,SN Ia,,,,ZTF,ZTF,ZTF,ZTF,...,1,,19.84,r-ZTF,2018-05-24 05:43:35.000,De,,2018TNSTR2024....1D,2018TNSCR2027....1D,


In [10]:
all_tns_result_df = all_tns_result_df[~all_tns_result_df["Disc. Internal Name"].isna()]
all_tns_result_df

Unnamed: 0_level_0,RA,DEC,Obj. Type,Redshift,Host Name,Host Redshift,Reporting Group/s,Discovery Data Source/s,Classifying Group/s,Associated Group/s,...,Public,End Prop. Period,Discovery Mag/Flux,Discovery Filter,Discovery Date (UT),Sender,Remarks,Discovery Bibcode,Classification Bibcodes,Ext. catalog/s
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SN 2020zct,06:32:50.530,-12:23:00.56,SN Iax[02cx-like],0.140000,,,REFITT,ZTF,REFITT,REFITT; ZTF,...,1,,19.8499,g-PTF,2020-09-26 11:35:27.744,REFITT_BOT,,2020TNSTR3408....1W,2020TNSCR3413....1W,
SN 2020vdc,02:58:54.456,-10:33:53.12,SN II,0.034400,,,"ZTF, ATLAS, Pan-STARRS","ZTF, ATLAS, Pan-STARRS",ZTF,ATLAS; Pan-STARRS; ZTF,...,1,,19.5800,g-ZTF,2020-09-29 09:09:49.248,ZTF_AMPEL_COMPLETE,,2020TNSTR3060....1N,2020TNSCR3640....1Y,
SN 2020uyr,22:33:34.050,-20:46:01.13,SN Ia,0.067767,,,"ZTF, Pan-STARRS, ATLAS, GaiaAlerts","ZTF, Pan-STARRS, ATLAS, GaiaAlerts",ZTF,ATLAS; GaiaAlerts; Pan-STARRS; ZTF,...,1,,20.1000,r-ZTF,2020-09-20 08:02:24.000,ZTF_Bot1,,2020TNSTR3038....1F,2020TNSCR3122....1D,
SN 2020uye,01:37:19.308,+04:48:07.01,SN Ia,0.060000,,,"ZTF, ATLAS, Pan-STARRS","ZTF, ATLAS, Pan-STARRS",ZTF,ATLAS; Pan-STARRS; ZTF,...,1,,20.4900,g-ZTF,2020-09-28 10:58:04.800,ZTF_Bot1,,2020TNSTR3030....1F,2020TNSCR3051....1D,
SN 2020uvj,06:37:26.333,+22:39:17.82,SN Ia,0.050000,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,19.5600,g-ZTF,2020-10-01 10:16:19.200,ZTF_Bot1,,2020TNSTR3019....1F,2020TNSCR3138....1D,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SN 2018aey,10:51:16.440,+22:28:39.40,SN Ia,0.077000,,0.0767,ZTF,ZTF,ZTF,ZTF,...,1,2018-04-24,18.4000,r-ZTF,2018-03-05 06:08:38.000,ZTF_Bot1,,2018TNSTR.545....1L,2018TNSCR.547....1N,
SN 2018aex,11:00:45.380,+22:17:14.96,SN II,0.023000,,0.0229,"ZTF, GaiaAlerts, ATLAS","ZTF, GaiaAlerts, ATLAS",ZTF,ATLAS; GaiaAlerts; ZTF,...,1,,18.1000,r-ZTF,2018-03-05 06:07:12.000,ZTF_Bot1,,2018TNSTR.545....1L,2018TNSCR.547....1N,
SN 2018yt,08:59:35.590,+45:36:41.90,SN II,0.052000,,,ZTF,ZTF,ZTF,ZTF,...,1,2018-04-24,18.9000,r-ZTF,2018-02-09 06:00:00.000,ZTF_Bot1,,2018TNSTR.544....1T,2018TNSCR.548....1T,
SN 2018mg,10:56:53.580,+79:16:42.50,SN Ia,0.110000,,,ZTF,ZTF,ZTF,ZTF,...,1,2018-02-07,20.1000,g-ZTF,2018-01-11 10:52:26.000,ZTF_Bot1,,2018TNSTR.172....1L,2018TNSCR.175....1F,


## Cut on which objects are still tagged by lc_feature_extractor Filter

In [11]:
%%time

ztfid_keep_l = []
for i, ztfid in enumerate(list(all_tns_result_df["Disc. Internal Name"])):
    if i % 500 == 0: print(i)
    locus = antares_client.search.get_by_ztf_object_id(ztf_object_id=ztfid)
    if locus is not None:
        if "lc_feature_extractor" not in locus.tags:
            ztfid_keep_l.append(ztfid)

0
500
1000
1500
2000
2500
CPU times: user 1min 15s, sys: 5.41 s, total: 1min 20s
Wall time: 4min 36s


In [12]:
keep_tns_result_df = all_tns_result_df[all_tns_result_df["Disc. Internal Name"].isin(ztfid_keep_l)]
keep_tns_result_df

Unnamed: 0_level_0,RA,DEC,Obj. Type,Redshift,Host Name,Host Redshift,Reporting Group/s,Discovery Data Source/s,Classifying Group/s,Associated Group/s,...,Public,End Prop. Period,Discovery Mag/Flux,Discovery Filter,Discovery Date (UT),Sender,Remarks,Discovery Bibcode,Classification Bibcodes,Ext. catalog/s
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SN 2020zct,06:32:50.530,-12:23:00.56,SN Iax[02cx-like],0.140000,,,REFITT,ZTF,REFITT,REFITT; ZTF,...,1,,19.8499,g-PTF,2020-09-26 11:35:27.744,REFITT_BOT,,2020TNSTR3408....1W,2020TNSCR3413....1W,
SN 2020uma,22:28:50.948,+44:38:05.12,SN Ia,0.076000,,,"ZTF, Pan-STARRS","ZTF, Pan-STARRS",ZTF,Pan-STARRS; ZTF,...,1,,19.8400,g-ZTF,2020-09-23 06:14:24.000,ZTF_Bot1,,2020TNSTR2971....1D,2020TNSCR2973....1D,
SN 2020ule,01:01:01.907,+21:11:57.54,SN Ia,0.110000,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,19.9200,g-ZTF,2020-09-27 08:07:24.672,ZTF_AMPEL_COMPLETE,,2020TNSTR2972....1N,2020TNSCR3192....1D,
SN 2020ugv,01:53:30.936,+01:33:26.12,SN Ia,0.100000,SDSS J015331.03+013324.4,0.068000,"ALeRCE, ZTF",ZTF,ZTF,ZTF,...,1,,20.0451,g-ZTF,2020-09-24 07:25:17.999,ALeRCE,,2020TNSTR2941....1F,2020TNSCR3289....1D,
SN 2020uea,02:31:21.169,+43:27:53.25,SN Ia,0.019544,CGCG 539-037,0.019544,"ALeRCE, ATLAS, ZTF, Pan-STARRS","ZTF, ATLAS, Pan-STARRS",ZTF,ATLAS; Pan-STARRS; ZTF,...,1,,19.1293,r-ZTF,2020-09-24 09:37:11.004,ALeRCE,,2020TNSTR2928....1F,2020TNSCR2973....1D,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SN 2018bxr,13:38:47.280,+30:57:34.50,SN Ia,0.064000,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,20.0900,r-ZTF,2018-05-19 06:28:48.000,ZTF_Bot1,,2018TNSTR.705....1F,2018TNSCR1071....1F,
SN 2018bsu,16:45:32.218,+59:55:07.05,SN Ia,0.080000,,,ZTF,ZTF,ZTF,ZTF,...,1,,19.5300,r-ZTF,2018-05-14 09:20:09.000,ZTF_Bot1,,2018TNSTR.658....1F,2019TNSCR.933....1F,
SN 2018bij,10:20:21.072,+29:24:45.49,SN Ia,0.081100,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,18.6900,r-ZTF,2018-05-07 03:41:45.000,ZTF_Bot1,,2018TNSTR.610....1F,2018TNSCR1071....1F,
SN 2018bgq,15:12:44.551,+40:06:35.73,SN Ia,0.062200,,,ZTF,ZTF,ZTF,ZTF,...,1,,18.9100,r-ZTF,2018-05-04 06:37:26.000,ZTF_Bot1,,2018TNSTR.602....1F,2019TNSCR.892....1F,


In [13]:
keep_tns_result_df.to_csv('../tables/before_lc-feature-extractor_tns_search_keep_2127spec.csv', index=True)

In [14]:
keep_tns_result_df = pd.read_csv('../tables/before_lc-feature-extractor_tns_search_keep_2127spec.csv', index_col=0)
keep_tns_result_df

Unnamed: 0_level_0,RA,DEC,Obj. Type,Redshift,Host Name,Host Redshift,Reporting Group/s,Discovery Data Source/s,Classifying Group/s,Associated Group/s,...,Public,End Prop. Period,Discovery Mag/Flux,Discovery Filter,Discovery Date (UT),Sender,Remarks,Discovery Bibcode,Classification Bibcodes,Ext. catalog/s
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SN 2020zct,06:32:50.530,-12:23:00.56,SN Iax[02cx-like],0.140000,,,REFITT,ZTF,REFITT,REFITT; ZTF,...,1,,19.8499,g-PTF,2020-09-26 11:35:27.744,REFITT_BOT,,2020TNSTR3408....1W,2020TNSCR3413....1W,
SN 2020uma,22:28:50.948,+44:38:05.12,SN Ia,0.076000,,,"ZTF, Pan-STARRS","ZTF, Pan-STARRS",ZTF,Pan-STARRS; ZTF,...,1,,19.8400,g-ZTF,2020-09-23 06:14:24.000,ZTF_Bot1,,2020TNSTR2971....1D,2020TNSCR2973....1D,
SN 2020ule,01:01:01.907,+21:11:57.54,SN Ia,0.110000,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,19.9200,g-ZTF,2020-09-27 08:07:24.672,ZTF_AMPEL_COMPLETE,,2020TNSTR2972....1N,2020TNSCR3192....1D,
SN 2020ugv,01:53:30.936,+01:33:26.12,SN Ia,0.100000,SDSS J015331.03+013324.4,0.068000,"ALeRCE, ZTF",ZTF,ZTF,ZTF,...,1,,20.0451,g-ZTF,2020-09-24 07:25:17.999,ALeRCE,,2020TNSTR2941....1F,2020TNSCR3289....1D,
SN 2020uea,02:31:21.169,+43:27:53.25,SN Ia,0.019544,CGCG 539-037,0.019544,"ALeRCE, ATLAS, ZTF, Pan-STARRS","ZTF, ATLAS, Pan-STARRS",ZTF,ATLAS; Pan-STARRS; ZTF,...,1,,19.1293,r-ZTF,2020-09-24 09:37:11.004,ALeRCE,,2020TNSTR2928....1F,2020TNSCR2973....1D,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
SN 2018bxr,13:38:47.280,+30:57:34.50,SN Ia,0.064000,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,20.0900,r-ZTF,2018-05-19 06:28:48.000,ZTF_Bot1,,2018TNSTR.705....1F,2018TNSCR1071....1F,
SN 2018bsu,16:45:32.218,+59:55:07.05,SN Ia,0.080000,,,ZTF,ZTF,ZTF,ZTF,...,1,,19.5300,r-ZTF,2018-05-14 09:20:09.000,ZTF_Bot1,,2018TNSTR.658....1F,2019TNSCR.933....1F,
SN 2018bij,10:20:21.072,+29:24:45.49,SN Ia,0.081100,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,18.6900,r-ZTF,2018-05-07 03:41:45.000,ZTF_Bot1,,2018TNSTR.610....1F,2018TNSCR1071....1F,
SN 2018bgq,15:12:44.551,+40:06:35.73,SN Ia,0.062200,,,ZTF,ZTF,ZTF,ZTF,...,1,,18.9100,r-ZTF,2018-05-04 06:37:26.000,ZTF_Bot1,,2018TNSTR.602....1F,2019TNSCR.892....1F,


In [15]:
Counter(keep_tns_result_df["Obj. Type"])

Counter({'SN Iax[02cx-like]': 7,
         'SN Ia': 1534,
         'SN Ic': 41,
         'SN IIb': 28,
         'SN Ib': 34,
         'SN II': 250,
         'SN Ia-91T-like': 46,
         'SN Ic-BL': 20,
         'SN Ib/c': 8,
         'SN Ia-pec': 12,
         'SN IIn': 45,
         'SLSN-I': 25,
         'SN Ia-91bg-like': 8,
         'SN IIP': 24,
         'TDE': 9,
         'SN Ibn': 11,
         'SLSN-II': 15,
         'SN': 2,
         'SN Ia-CSM': 2,
         'SN Ia-Ca-rich': 1,
         'SN Icn': 1,
         'SN II-pec': 1,
         'ILRT': 1,
         'SN Ib-Ca-rich': 2})

In [16]:
np.unique(keep_tns_result_df["Obj. Type"])

array(['ILRT', 'SLSN-I', 'SLSN-II', 'SN', 'SN II', 'SN II-pec', 'SN IIP',
       'SN IIb', 'SN IIn', 'SN Ia', 'SN Ia-91T-like', 'SN Ia-91bg-like',
       'SN Ia-CSM', 'SN Ia-Ca-rich', 'SN Ia-pec', 'SN Iax[02cx-like]',
       'SN Ib', 'SN Ib-Ca-rich', 'SN Ib/c', 'SN Ibn', 'SN Ic', 'SN Ic-BL',
       'SN Icn', 'TDE'], dtype=object)

## Extract lc features as function of time for {objid}-timeseries.csv

In [17]:
import requests
from requests.auth import HTTPBasicAuth
import astropy.table as at
import matplotlib
from matplotlib.transforms import Bbox
from matplotlib.backends.backend_pdf import PdfPages
from astropy.io import fits
from astropy.wcs import WCS
from astropy.coordinates import SkyCoord
from astropy.coordinates import Angle
import astropy.units as u
from astropy.visualization import PercentileInterval, AsinhStretch
from astroquery.mast import Catalogs
from astroquery.sdss import SDSS
from astroquery.simbad import Simbad

import light_curve as lc
from itertools import chain
import light_curve as lc
from astropy.table import MaskedColumn

from PIL import Image
import os
import sys
import shutil
import glob
import json

import math

import warnings
warnings.filterwarnings("ignore")

import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle

import os
import sys
import annoy
from annoy import AnnoyIndex
import random



from IPython.display import display_markdown

from collections import Counter

from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial import cKDTree
from sklearn.decomposition import PCA
from sklearn.decomposition import SparsePCA

from alerce.core import Alerce
alerce = Alerce()

import antares_client

df_path = "/Users/patrickaleo/Desktop/Illinois/LAISS-antares/repo/tables/before_lc-feature-extractor_tns_search/timeseries"

plt.style.use('fig_publication.mplstyle')
%config InlineBackend.figure_format = 'retina'  #for MacOS, make plots crisp


In [18]:
def replace_magn_with_flux(s):
    if 'magnitude' in s:
        return s.replace('magnitudes', 'fluxes').replace('magnitude', 'flux')
    return f'{s} for flux light curve'

def create_base_features_class(
        magn_extractor,
        flux_extractor,
        bands=('R', 'g',),
    ):
    feature_names = ([f'{name}_magn' for name in magn_extractor.names]
                     + [f'{name}_flux' for name in flux_extractor.names])
    
    property_names = {band: [f'feature_{name}_{band}'.lower()
                             for name in feature_names]
                      for band in bands}
    
    features_count = len(feature_names)
    
    return feature_names, property_names, features_count


MAGN_EXTRACTOR = lc.Extractor(
    lc.Amplitude(),
    lc.AndersonDarlingNormal(),
    lc.BeyondNStd(1.0),
    lc.BeyondNStd(2.0),
    lc.Cusum(),
    lc.EtaE(),
    lc.InterPercentileRange(0.02),
    lc.InterPercentileRange(0.1),
    lc.InterPercentileRange(0.25),
    lc.Kurtosis(),
    lc.LinearFit(),
    lc.LinearTrend(),
    lc.MagnitudePercentageRatio(0.4, 0.05),
    lc.MagnitudePercentageRatio(0.2, 0.05),
    lc.MaximumSlope(),
    lc.Mean(),
    lc.MedianAbsoluteDeviation(),
    lc.PercentAmplitude(),
    lc.PercentDifferenceMagnitudePercentile(0.05),
    lc.PercentDifferenceMagnitudePercentile(0.1),
    lc.MedianBufferRangePercentage(0.1),
    lc.MedianBufferRangePercentage(0.2),
    lc.Periodogram(
        peaks=5,
        resolution=10.0,
        max_freq_factor=2.0,
        nyquist='average',
        fast=True,
        features=(
            lc.Amplitude(),
            lc.BeyondNStd(2.0),
            lc.BeyondNStd(3.0),
            lc.StandardDeviation(),
        ),
    ),
    lc.ReducedChi2(),
    lc.Skew(),
    lc.StandardDeviation(),
    lc.StetsonK(),
    lc.WeightedMean(),
)

FLUX_EXTRACTOR = lc.Extractor(
    lc.AndersonDarlingNormal(),
    lc.Cusum(),
    lc.EtaE(),
    lc.ExcessVariance(),
    lc.Kurtosis(),
    lc.MeanVariance(),
    lc.ReducedChi2(),
    lc.Skew(),
    lc.StetsonK(),
)

def remove_simultaneous_alerts(table):
    """Remove alert duplicates"""
    dt = np.diff(table['ant_mjd'], append=np.inf)
    return table[dt != 0]
    
def get_detections(photometry, band):
    """Extract clean light curve in given band from locus photometry"""
    band_lc = photometry[(photometry['ant_passband'] == band) & (~photometry['ant_mag'].isna())]
    idx = ~MaskedColumn(band_lc['ant_mag']).mask
    detections = remove_simultaneous_alerts(band_lc[idx])
    return detections

In [19]:
import astro_ghost
# from astro_ghost.PS1QueryFunctions import getAllPostageStamps
# from astro_ghost.TNSQueryFunctions import getTNSSpectra
# from astro_ghost.NEDQueryFunctions import getNEDSpectra
from astro_ghost.ghostHelperFunctions import getTransientHosts, getGHOST
from astropy.coordinates import SkyCoord
from astropy import units as u
from datetime import datetime
import tempfile

# Throw RA/DEC into ghost with just DLR method, gentle starcut
# Sets environ var to find ghost.csv
os.environ['GHOST_PATH'] = './host_info'
# Then don't use getGHOST(real=True, verbose=verbose)
getGHOST(real=True,verbose=False)

GHOST database already exists in the install path!


In [20]:
astro_ghost.__version__

'2.0.16'

In [60]:
def extract_lc_and_host_features(ztf_id_ref, spec_class, show_lc=False, show_host=False):
    ztf_id_ref = ztf_id_ref #'ZTF20aalxlis' #'ZTF21abmspzt'
    spec_class = spec_class
    df_path = "/Users/patrickaleo/Desktop/Illinois/LAISS-antares/repo/tables/before_lc-feature-extractor_tns_search/timeseries"

    try:
        ref_info = antares_client.search.get_by_ztf_object_id(ztf_object_id=ztf_id_ref)
        df_ref = ref_info.timeseries.to_pandas()
    except:
        print("antares_client can't find this object. Skip! Continue...")
        return
    
    df_ref_g = df_ref[(df_ref.ant_passband == 'g') & (~df_ref.ant_mag.isna())]
    df_ref_r = df_ref[(df_ref.ant_passband == 'R') & (~df_ref.ant_mag.isna())]

    try:
        mjd_idx_at_min_mag_r_ref = df_ref_r[['ant_mag']].reset_index().idxmin().ant_mag
        mjd_idx_at_min_mag_g_ref = df_ref_g[['ant_mag']].reset_index().idxmin().ant_mag
    except:
        print(f"No obs for {ztf_id_ref}. pass!\n")
        return
    
    if show_lc:
        fig, ax = plt.subplots(figsize=(7,7))
        plt.gca().invert_yaxis()

        ax.errorbar(x=df_ref_r.ant_mjd, y=df_ref_r.ant_mag, yerr=df_ref_r.ant_magerr, fmt='o', c='r', 
                    label=f'REF: {ztf_id_ref}')
        ax.errorbar(x=df_ref_g.ant_mjd, y=df_ref_g.ant_mag, yerr=df_ref_g.ant_magerr, fmt='o', c='g')
        plt.show()
        
    min_obs_count=4

    lightcurve = ref_info.lightcurve
    feature_names, property_names, features_count = create_base_features_class(MAGN_EXTRACTOR, FLUX_EXTRACTOR)


    g_obs = list(get_detections(lightcurve, 'g').ant_mjd.values)
    r_obs = list(get_detections(lightcurve, 'R').ant_mjd.values)
    mjd_l = sorted(g_obs+r_obs)

    lc_properties_d_l = []
    len_det_counter_r,len_det_counter_g = 0,0

    band_lc = lightcurve[(~lightcurve['ant_mag'].isna())]
    idx = ~MaskedColumn(band_lc['ant_mag']).mask
    all_detections = remove_simultaneous_alerts(band_lc[idx])
    for ob, mjd in enumerate(mjd_l): # requires 4 obs
        # do time evolution of detections - in chunks

        detections_pb = all_detections[all_detections['ant_mjd'].values <= mjd]
        #print(detections)
        lc_properties_d={}
        for band, names in property_names.items():
            detections = detections_pb[detections_pb['ant_passband'] == band]

            # Ensure locus has >3 obs for calculation
            if (len(detections) < min_obs_count):
                continue
            #print(detections)

            t = detections['ant_mjd'].values
            m = detections['ant_mag'].values
            merr = detections['ant_magerr'].values
            flux = np.power(10.0, -0.4 * m)
            fluxerr = 0.5 * flux * (np.power(10.0, 0.4 * merr) - np.power(10.0, -0.4 * merr))

            magn_features = MAGN_EXTRACTOR(
                t,
                m,
                merr,
                fill_value=None,
            )
            flux_features = FLUX_EXTRACTOR(
                t,
                flux,
                fluxerr,
                fill_value=None,
            )

            # After successfully calculating features, set locus properties and tag
            lc_properties_d["obs_num"] = int(ob)
            lc_properties_d["mjd_cutoff"] = mjd
            lc_properties_d["ztf_object_id"] = ztf_id_ref
            #print(band, m)
            for name, value in zip(names, chain(magn_features, flux_features)):
                lc_properties_d[name] = value
                #if name == "feature_amplitude_magn_g": print(m, value, band)
            #print("%%%%%%%%")
        lc_properties_d_l.append(lc_properties_d)

    lc_properties_d_l = [d for d in lc_properties_d_l if d]
    lc_properties_df = pd.DataFrame(lc_properties_d_l)
    if len(lc_properties_df) == 0: 
        print(f"Not enough obs for {ztf_id_ref}. pass!\n")
        return
    print(f"Extracted LC features for {ztf_id_ref}!")
    
    # Get GHOST features
    ra,dec=np.mean(df_ref.ant_ra),np.mean(df_ref.ant_dec)
    snName=[ztf_id_ref, ztf_id_ref]
    snCoord = [SkyCoord(ra*u.deg, dec*u.deg, frame='icrs'), SkyCoord(ra*u.deg, dec*u.deg, frame='icrs')]
    with tempfile.TemporaryDirectory() as tmp:
        try:
            hosts = getTransientHosts(transientName=snName, snCoord=snCoord, GLADE=True, verbose=0, 
                                  starcut='gentle', ascentMatch=False, savepath=tmp, redo_search=False)
        except:
            print(f"GHOST error for {ztf_id_ref}. Retry without GLADE. \n")
            hosts = getTransientHosts(transientName=snName, snCoord=snCoord, GLADE=False, verbose=0, 
                                  starcut='gentle', ascentMatch=False, savepath=tmp, redo_search=False)       
            
    if len(hosts) > 1:
        hosts = pd.DataFrame(hosts.loc[0]).T
    
    hosts_df = hosts[feature_names_hostgal]
    hosts_df = hosts_df[~hosts_df.isnull().any(axis=1)]

    if len(hosts_df) < 1:
        # if any features are nan, we can't use as input
        print(f"Some features are NaN for {ztf_id_ref}. Skip!\n")
        return
        
    if show_host:
        print(f'http://ps1images.stsci.edu/cgi-bin/ps1cutouts?pos={hosts.raMean.values[0]}+{hosts.decMean.values[0]}&filter=color')
    
    # Define the label array
    label_arr = np.array(['ILRT', 'SLSN-I', 'SLSN-II', 'SN', 'SN II', 'SN II-pec', 'SN IIP',
       'SN IIb', 'SN IIn', 'SN Ia', 'SN Ia-91T-like', 'SN Ia-91bg-like',
       'SN Ia-CSM', 'SN Ia-Ca-rich', 'SN Ia-pec', 'SN Iax[02cx-like]',
       'SN Ib', 'SN Ib-Ca-rich', 'SN Ib/c', 'SN Ibn', 'SN Ic', 'SN Ic-BL',
       'SN Icn', 'TDE'])

    # Define the spectroscopic class
    spec_class = spec_class

    # Find the index of the spectroscopic class in the label array
    class_index = np.where(label_arr == spec_class)[0]

    # Create a one-hot encoded array with all zeros
    onehot_array = np.zeros(len(label_arr), dtype=float)

    # Set the corresponding index to 1
    onehot_array[class_index] = 1

    # Reshape the array to have a shape of (1, num_classes)
    onehot_array = onehot_array.reshape(1, -1)
    
    onehot_df = pd.DataFrame(np.array(onehot_array, dtype=float), columns=label_arr)
    onehot_df = pd.concat([onehot_df] * len(lc_properties_df), ignore_index=True)
    
    hosts_df = hosts[feature_names_hostgal]
    hosts_df = pd.concat([hosts_df] * len(lc_properties_df), ignore_index=True)

    lc_and_hosts_df = pd.concat([lc_properties_df, hosts_df, onehot_df], axis=1)
    lc_and_hosts_df = lc_and_hosts_df.set_index('ztf_object_id')
    lc_and_hosts_df.to_csv(f'{df_path}/{lc_and_hosts_df.index[0]}_timeseries.csv')
    
    print(f"Saved results for {ztf_id_ref}!\n")

In [75]:
%%time
for i, (ztf_obj_test, spec_cls) in enumerate(zip(keep_tns_result_df["Disc. Internal Name"][1166:], 
                        keep_tns_result_df["Obj. Type"][1166:])):
    
    if i % 50 == 0: print(i)
    
    if os.path.exists(f"/Users/patrickaleo/Desktop/Illinois/LAISS-antares/repo/tables/before_lc-feature-extractor_tns_search/timeseries/{ztf_obj_test}_timeseries.csv"): 
        print(f'{ztf_obj_test} is already made. Continue!\n')
        continue
    
    else:
        extract_lc_and_host_features(ztf_id_ref=ztf_obj_test, spec_class=spec_cls)

0
Extracted LC features for ZTF19abzsitm!
Saved results for ZTF19abzsitm!

Extracted LC features for ZTF19abzhfjj!
Found 2 hosts in GLADE! See gladeDLR.txt for details.
Found matches for 50.0% of events.
Saved results for ZTF19abzhfjj!

Extracted LC features for ZTF19abzrhgq!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Saved results for ZTF19abzrhgq!

Extracted LC features for ZTF19abzrdup!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Some features are NaN for ZTF19abzrdup. Skip!

Extracted LC features for ZTF19abzqkdz!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Saved results for ZTF19abzqkdz!

Extracted LC features for ZTF19abzlsbl!
Saved results for ZTF19abzlsbl!

Extracted LC features for ZTF19abyhxzp!
Saved results for ZTF19abyhxzp!

Extracted LC features for ZTF19abzmnuk!
Saved results for ZTF19abzmnuk!


Extracted LC features for ZTF19abrirdm!
Saved results for ZTF19abrirdm!

Extracted LC features for ZTF19abtnftd!
Saved results for ZTF19abtnftd!

Extracted LC features for ZTF19abuonnx!
Saved results for ZTF19abuonnx!

Extracted LC features for ZTF19abqyoxt!
Saved results for ZTF19abqyoxt!

Extracted LC features for ZTF19abrmlxu!
Saved results for ZTF19abrmlxu!

Extracted LC features for ZTF19abueupg!
Saved results for ZTF19abueupg!

Extracted LC features for ZTF19abukakm!
Saved results for ZTF19abukakm!

Extracted LC features for ZTF19abudlps!
Saved results for ZTF19abudlps!

Extracted LC features for ZTF19abudjie!
Some features are NaN for ZTF19abudjie. Skip!

Extracted LC features for ZTF19absfopz!
Saved results for ZTF19absfopz!

Not enough obs for ZTF19abtuigb. pass!

Extracted LC features for ZTF19abttstb!
Saved results for ZTF19abttstb!

100
Extracted LC features for ZTF19abtsnyy!
Saved results for ZTF19abtsnyy!

Extracted LC features for ZTF19abtslqm!
Saved results for ZTF19abt

Saved results for ZTF19abowcic!

Extracted LC features for ZTF19abobxik!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Saved results for ZTF19abobxik!

Extracted LC features for ZTF19abnkrcd!
Saved results for ZTF19abnkrcd!

Extracted LC features for ZTF19abnimpq!
Saved results for ZTF19abnimpq!

Extracted LC features for ZTF19abomiel!
Saved results for ZTF19abomiel!

Extracted LC features for ZTF19abmzmom!
Saved results for ZTF19abmzmom!

Extracted LC features for ZTF19abmqasg!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 0.0% of events.
Some features are NaN for ZTF19abmqasg. Skip!

Extracted LC features for ZTF19abmylxw!
Found 2 hosts in GLADE! See gladeDLR.txt for details.
Found matches for 50.0% of events.
Some features are NaN for ZTF19abmylxw. Skip!

Extracted LC features for ZTF19abmpoxa!
Saved results for ZTF19abmpoxa!

Extracted LC features for ZTF19ablpnmk!
Some features are N

Saved results for ZTF19abfvhlx!

Extracted LC features for ZTF19abfwtkw!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 0.0% of events.
Some features are NaN for ZTF19abfwtkw. Skip!

Extracted LC features for ZTF19abfsxpw!
Saved results for ZTF19abfsxpw!

Extracted LC features for ZTF19abgbdcp!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Saved results for ZTF19abgbdcp!

No obs for ZTF19abfjjlk. pass!

Extracted LC features for ZTF19abdzehg!
Saved results for ZTF19abdzehg!

Extracted LC features for ZTF19abdmfgf!
Saved results for ZTF19abdmfgf!

Extracted LC features for ZTF19abfqhis!
Saved results for ZTF19abfqhis!

Extracted LC features for ZTF19abdkcib!
Saved results for ZTF19abdkcib!

Extracted LC features for ZTF18abdcudb!
Some features are NaN for ZTF18abdcudb. Skip!

Extracted LC features for ZTF19abeloei!
Some features are NaN for ZTF19abeloei. Skip!

Extracted LC features for ZTF

Saved results for ZTF19aayvylv!

Extracted LC features for ZTF19aayrosj!
Saved results for ZTF19aayrosj!

Extracted LC features for ZTF19aaywtoc!
Saved results for ZTF19aaywtoc!

Extracted LC features for ZTF19aazudta!
Saved results for ZTF19aazudta!

Extracted LC features for ZTF19aazlsfj!
Saved results for ZTF19aazlsfj!

Extracted LC features for ZTF19aaziick!
Saved results for ZTF19aaziick!

Extracted LC features for ZTF19aaywlmi!
Saved results for ZTF19aaywlmi!

Extracted LC features for ZTF19aazfvhh!
Saved results for ZTF19aazfvhh!

Extracted LC features for ZTF19aazpvdy!
Saved results for ZTF19aazpvdy!

Extracted LC features for ZTF19aayslow!
Saved results for ZTF19aayslow!

Extracted LC features for ZTF19aaysiwt!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Saved results for ZTF19aaysiwt!

Not enough obs for ZTF19aayshvd. pass!

Extracted LC features for ZTF19aayrjnf!
Saved results for ZTF19aayrjnf!

Extracted LC feature

Some features are NaN for ZTF19aauvbqv. Skip!

Extracted LC features for ZTF19aauuzlr!
Saved results for ZTF19aauuzlr!

Extracted LC features for ZTF19aaumvig!
Some features are NaN for ZTF19aaumvig. Skip!

Extracted LC features for ZTF19aaujzqh!
Saved results for ZTF19aaujzqh!

Extracted LC features for ZTF19aaupkrl!
Saved results for ZTF19aaupkrl!

Extracted LC features for ZTF19aauplxw!
Saved results for ZTF19aauplxw!

Extracted LC features for ZTF19aauitks!
Saved results for ZTF19aauitks!

Extracted LC features for ZTF18aaqkcso!
Some features are NaN for ZTF18aaqkcso. Skip!

Extracted LC features for ZTF19aauisdr!
Saved results for ZTF19aauisdr!

Extracted LC features for ZTF19aauhwbp!
Saved results for ZTF19aauhwbp!

Extracted LC features for ZTF19aatnfvj!
Saved results for ZTF19aatnfvj!

Extracted LC features for ZTF19aaujiwc!
Saved results for ZTF19aaujiwc!

Extracted LC features for ZTF19aauishy!
Some features are NaN for ZTF19aauishy. Skip!

Extracted LC features for ZTF19aaug

Found matches for 50.0% of events.
Saved results for ZTF19aapbgty!

Extracted LC features for ZTF19aaozplq!
Some features are NaN for ZTF19aaozplq. Skip!

No obs for ZTF19aanyubf. pass!

Extracted LC features for ZTF19aaozsuh!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Some features are NaN for ZTF19aaozsuh. Skip!

Extracted LC features for ZTF19aanxrkt!
Some features are NaN for ZTF19aanxrkt. Skip!

Extracted LC features for ZTF19aanxosu!
Saved results for ZTF19aanxosu!

Extracted LC features for ZTF19aapcvxi!
Saved results for ZTF19aapcvxi!

Extracted LC features for ZTF19aapadxs!
Saved results for ZTF19aapadxs!

Extracted LC features for ZTF19aaokist!
Saved results for ZTF19aaokist!

Extracted LC features for ZTF19aaocbes!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 0.0% of events.
Some features are NaN for ZTF19aaocbes. Skip!

Extracted LC features for ZTF19aaobwpx!
Saved results

Extracted LC features for ZTF19aamqebl!
Saved results for ZTF19aamqebl!

Extracted LC features for ZTF19aampqcq!
Saved results for ZTF19aampqcq!

Extracted LC features for ZTF19aamgghn!
Saved results for ZTF19aamgghn!

No obs for ZTF18achxcsh. pass!

Extracted LC features for ZTF19aamoqxy!
Saved results for ZTF19aamoqxy!

Extracted LC features for ZTF19aakrawk!
Saved results for ZTF19aakrawk!

Extracted LC features for ZTF19aamohrk!
Saved results for ZTF19aamohrk!

Extracted LC features for ZTF18abhhxcp!
Saved results for ZTF18abhhxcp!

Extracted LC features for ZTF19aamhhey!
Saved results for ZTF19aamhhey!

Extracted LC features for ZTF19aamhhae!
Saved results for ZTF19aamhhae!

Extracted LC features for ZTF19aammlzn!
Saved results for ZTF19aammlzn!

Extracted LC features for ZTF19aalxlpp!
Saved results for ZTF19aalxlpp!

Extracted LC features for ZTF19aaluprf!
Saved results for ZTF19aaluprf!

Extracted LC features for ZTF19aaleptm!
Saved results for ZTF19aaleptm!

Extracted LC featur

Extracted LC features for ZTF19aaeicsm!
Saved results for ZTF19aaeicsm!

Extracted LC features for ZTF19aadtqcd!
Some features are NaN for ZTF19aadtqcd. Skip!

Extracted LC features for ZTF19aadypig!
Found no hosts in GLADE.
Finding northern sources with Pan-starrs...
Found matches for 50.0% of events.
Some features are NaN for ZTF19aadypig. Skip!

Extracted LC features for ZTF19aadttht!
Some features are NaN for ZTF19aadttht. Skip!

Extracted LC features for ZTF19aadolpe!
Saved results for ZTF19aadolpe!

Extracted LC features for ZTF19aadoocn!
Saved results for ZTF19aadoocn!

Not enough obs for ZTF19aadpdqm. pass!

Extracted LC features for ZTF19aadnwbv!
Some features are NaN for ZTF19aadnwbv. Skip!

Extracted LC features for ZTF19aadnxgj!
Saved results for ZTF19aadnxgj!

Not enough obs for ZTF19aaddzpk. pass!

Extracted LC features for ZTF19aacwljg!
Saved results for ZTF19aacwljg!

Not enough obs for ZTF19aacigpk. pass!

Not enough obs for ZTF19aaactno. pass!

Not enough obs for ZTF1

Extracted LC features for ZTF18abhxidv!
Saved results for ZTF18abhxidv!

Extracted LC features for ZTF18abfwuwn!
Saved results for ZTF18abfwuwn!

Extracted LC features for ZTF18abixkjh!
Saved results for ZTF18abixkjh!

Extracted LC features for ZTF18aailmnv!
Some features are NaN for ZTF18aailmnv. Skip!

Extracted LC features for ZTF18abeegsl!
Saved results for ZTF18abeegsl!

Extracted LC features for ZTF18abfgygp!
Saved results for ZTF18abfgygp!

Extracted LC features for ZTF18abdkimx!
Saved results for ZTF18abdkimx!

Extracted LC features for ZTF18aansqun!
Saved results for ZTF18aansqun!

Extracted LC features for ZTF18abddrhd!
Some features are NaN for ZTF18abddrhd. Skip!

Extracted LC features for ZTF18abesqnb!
Saved results for ZTF18abesqnb!

Extracted LC features for ZTF18abespgb!
Saved results for ZTF18abespgb!

Extracted LC features for ZTF18abetewu!
Saved results for ZTF18abetewu!

Extracted LC features for ZTF18abcyilc!
Saved results for ZTF18abcyilc!

Not enough obs for ZTF1

In [74]:
keep_tns_result_df.iloc[1166:1170]

Unnamed: 0_level_0,RA,DEC,Obj. Type,Redshift,Host Name,Host Redshift,Reporting Group/s,Discovery Data Source/s,Classifying Group/s,Associated Group/s,...,Public,End Prop. Period,Discovery Mag/Flux,Discovery Filter,Discovery Date (UT),Sender,Remarks,Discovery Bibcode,Classification Bibcodes,Ext. catalog/s
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SN 2019qky,00:19:53.090,-23:23:51.68,SN Ia,0.046,2MASX J00195288-2323393,0.063454,"ALeRCE, ZTF, Pan-STARRS, ATLAS, GaiaAlerts","ZTF, Pan-STARRS, ATLAS, GaiaAlerts",ZTF,ALeRCE; ATLAS; GaiaAlerts; Pan-STARRS; ZTF,...,1,,18.759,g-ZTF,2019-09-20 09:02:40.000,ALeRCE,,2019TNSTR1865....1F,2019TNSCR1923....1F,
SN 2019qjj,00:36:01.664,-05:23:01.98,SN Ia,0.068,,,ZTF,ZTF,ZTF,ZTF,...,1,,18.75,g-ZTF,2019-09-17 09:05:17.000,ZTF_AMPEL_NEW,,2019TNSTR1872....1N,2020TNSCR.601....1D,
AT 2019qiz,04:46:37.880,-10:13:34.90,TDE,0.0151,2MASX J04463790-1013349,0.01513,"ALeRCE, ZTF, ATLAS, GaiaAlerts, Pan-STARRS","ZTF, ATLAS, GaiaAlerts, Pan-STARRS",,ALeRCE; ATLAS; GaiaAlerts; Pan-STARRS; ZTF,...,1,,17.8184,r-ZTF,2019-09-19 11:59:43.000,ALeRCE,,2019TNSTR1857....1F,2019TNSCR1921....1S,
SN 2019qij,08:21:37.457,+40:51:48.54,SN II,0.1066,,,"ZTF, ATLAS","ZTF, ATLAS",ZTF,ATLAS; ZTF,...,1,,19.4,r-ZTF,2019-09-19 11:41:13.000,ZTF_AMPEL_NEW,,2019TNSTR1863....1N,2020TNSCR1935....1D,


## Load pickle

In [None]:
figure_path = f"/Users/patrickaleo/Desktop/Illinois/LAISS-antares/repo/RFC/SMOTE_train_test_70-30_min14_kneighbors8/cls=binary_n_estimators={n_estimators}_max_depth={max_depth}_rs={random_state}_max_feats={max_features}_cw=balanced/figures"
model_path = f"/Users/patrickaleo/Desktop/Illinois/LAISS-antares/repo/RFC/SMOTE_train_test_70-30_min14_kneighbors8/cls=binary_n_estimators={n_estimators}_max_depth={max_depth}_rs={random_state}_max_feats={max_features}_cw=balanced/model"
if not os.path.exists(figure_path):
    os.makedirs(figure_path)
    
if not os.path.exists(model_path):
    os.makedirs(model_path)

cm_path = f"{figure_path}/confusion_matrix/"
if not os.path.exists(cm_path):
    os.makedirs(cm_path)

In [None]:
n_estimators=100 #3500
max_depth=35 #35
random_state=11
max_features=35 # {“sqrt”, “log2”, None}, int or float, default=”sqrt” - sqrt(120) ~ 10
#class_weight={"None": 1, "SLSN-II": 1, "SN II": 1, 'SN IIP': 1, 'SN IIb': 1, 'SN IIn': 1, 'SN Ia': 1,
#       'SN Ia-91T-like': 1, 'SN Ib': 1, 'SN Ic': 1, 'SN Ic-BL': 1, 'TDE': 1} #"balanced"
class_weight={"Normal": 1, "Other": 1} #"balanced"

with open(f'{model_path}/cls=binary_n_estimators={n_estimators}_max_depth={max_depth}_rs={random_state}_max_feats={max_features}_cw=balanced.pkl', 'rb') as f:
    clf = pickle.load(f)

In [None]:
# Predict Excluded Object
predicted_probability = 100 * clf.predict_proba(x_test)

In [None]:
#clf.classes_[1:]
clf.classes_

In [None]:
# Create a dataframe from the arrays
df = pd.DataFrame(predicted_probability, columns=clf.classes_)
df = df.set_index(x_test_all.index)

# Add a new column "RFC_best_cls" to the existing dataframe that contains the class label with the highest probability per row
# clf.classes_[1:] and row.values[1:] ignores the None class
#df['RFC_best_cls'] = df.apply(lambda row: clf.classes_[1:][np.argmax(row.values[1:])], axis=1)
# USE clf.classes_ and row.values if you don't train on the None class
df['RFC_best_cls'] = df.apply(lambda row: clf.classes_[np.argmax(row.values)], axis=1)
df

In [None]:
df2 = pd.concat([x_test_all, df], axis=1)
df2 = df2.replace({"tns_cls": di})
df2

In [None]:
# Confusion matrices
title='RFC'
df = df2
true_labels = np.array(df['tns_cls'])
predicted_labels = np.array(df['RFC_best_cls'])#np.array(df['most_common_cls_ann=1-100']) # np.array(df['ann=1_tns_cls'])
# define the class labels
class_names = np.unique(true_labels)
nclasses = len(class_names)

KINDS = ['completeness', 'purity']
for KIND in KINDS:
    # Sims test set
    plot_conf_matrix(true_labels, predicted_labels, labels=class_names, 
             title=f'{title} ({KIND})', kind=KIND)
    plt.savefig(f'{cm_path}/confmatrix_before_lc_feature_extractor_{title}_{KIND}.jpg', dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()