* Main: This notebook contains the file that has tests on how different cuts remove objects on goodspec.

In [1]:
import numpy as np
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
import joblib

from astropy.io import fits
from functools import reduce
from astropy.table import Table

sys.path.append('../../methods_code_Nresol/')
sys.path.append('../../utils/')
import astro_cuts
import importlib
import spec_utils

In [2]:
dat = Table.read('lsd_scripts/spectrain_2_11_r50.fits', format='fits')

In [3]:
np.unique(np.array(dat['sdss_dr17_specobj.CLASS']))

array([b'GALAXY', b'QSO', b'STAR'], dtype='|S6')

In [4]:
fname = 'lsd_scripts/spectrain_2_11_r50.fits'
df_allspec = spec_utils.convert_to_dataframe_specmatched(fname)

  df['reduced_chisq'] = df['chisq'].to_numpy() * n_passbands / (n_passbands - 4)
  sdss_flux_sig = np.power(np.array(dat['sdss_dr14_starsweep.psfflux_ivar']), -0.5)
  df['sdss.pmag_'+b] = 22.5 - 2.5*np.clip(np.log10(np.array(dat['sdss_dr14_starsweep.psfflux'])[:, ib]), 0.0, np.inf)
  df['sdss.pmag_'+b] = 22.5 - 2.5*np.clip(np.log10(np.array(dat['sdss_dr14_starsweep.psfflux'])[:, ib]), 0.0, np.inf)


In [5]:
importlib.reload(spec_utils)

<module 'spec_utils' from '/n/holylfs05/LABS/finkbeiner_lab/Everyone/highlat/notebooks_main1/spectra_matched/spec_utils.py'>

In [6]:
df_goodspec = spec_utils.return_goodspec(df_allspec)

Number of objects before / after 596315 210713
c1 = 0.873, c2 = 0.995, c3 = 0.445
Fraction left = 0.353


## Section 1: 
### How many objects are not stars before cuts?

In [7]:
importlib.reload(spec_utils)
#make a table for this instead

<module 'spec_utils' from '/n/holylfs05/LABS/finkbeiner_lab/Everyone/highlat/notebooks_main1/spectra_matched/spec_utils.py'>

In [8]:
spec_utils.print_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'])

Number of Stars = 175597, Fraction(%) = 83.3347, Ratio(obj:star)*100 = 100
Number of Galaxies = 299, Fraction(%) = 0.1419, Ratio(obj:star)*100 = 0.1703
Number of QSOs = 34817, Fraction(%) = 16.5234, Ratio(obj:star)*100 = 19.8278


### After cuts: 15e

In [9]:
nbtest_dir = '../../notebooks_test/'

def combine_cuts(df_input, cuts_list):
    cutfuncs = [getattr(astro_cuts, ctup[0]) for ctup in cuts_list]
    masklist = []
    for c, ctup in enumerate(cuts_list):
        if len(ctup)==1:
            masklist.append(cutfuncs[c](df_input))
        else:
            masklist.append(cutfuncs[c](df_input, **ctup[1]))
        print('Cut{} {} {}'.format(c, np.sum(masklist[c]), np.sum(masklist[c])/len(masklist[c])))
    final_cut = reduce(np.logical_and, masklist) #intersection of all cuts
    print('Effective Cut = {:.3f}'.format(np.sum(final_cut)/len(final_cut)))
    return final_cut

In [10]:
os.listdir('models')

['.ipynb_checkpoints',
 'svm_sdss_quasarsep_unbalanced.joblib',
 'svm_panstarrs_quasarsep_balanced.joblib',
 'svm_sdss_quasarsep_balanced.joblib']

In [11]:
wisemodel = joblib.load('models/svm_panstarrs_quasarsep_balanced.joblib')
smodel = joblib.load('models/svm_sdss_quasarsep_unbalanced.joblib')

cuts_list = [('distmod_median_cut', {'mindm': 8}), ('e_nonneg_cut_sigfac', {'sigfac': 5.0}),
            ('bayestar_chisq_per_passband', {'maxchisq': 5.0}), 
            ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('sdss_uvcut', {'model': smodel}),
            ('dm_sigma_cut', {'maxsig': 1.5})]

final_cut = combine_cuts(df_goodspec, cuts_list) #intersection of all cuts
df15e = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(df15e['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 197696 0.938224029841538
Cut2 192061 0.9114814937853858
Cut3 176656 0.8383725731207852
Cut4 175386 0.8323454177008538
Cut5 200235 0.9502735948897315
Effective Cut = 0.730
Number of Stars = 153578, Fraction(%) = 99.8375, Ratio(obj:star)*100 = 100
Number of Galaxies = 126, Fraction(%) = 0.0819, Ratio(obj:star)*100 = 0.0820
Number of QSOs = 124, Fraction(%) = 0.0806, Ratio(obj:star)*100 = 0.0807


In [12]:
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], df15e['sdss_dr17_specobj.CLASS'])

StarFracChange = 16.503 (%)
GalFracChange = -0.060 (%)
QSOFracChange = -16.443 (%)


In [13]:
nbtest_dir

'../../notebooks_test/'

### Ablation Tests: which cut is doing what?

In [14]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Effective Cut = 0.931
Number of Stars = 164022, Fraction(%) = 83.6224, Ratio(obj:star)*100 = 100
Number of Galaxies = 282, Fraction(%) = 0.1438, Ratio(obj:star)*100 = 0.1719
Number of QSOs = 31842, Fraction(%) = 16.2338, Ratio(obj:star)*100 = 19.4132
StarFracChange = 0.288 (%)
GalFracChange = 0.002 (%)
QSOFracChange = -0.290 (%)


In [15]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), ('bayestar_chisq_per_passband', {'maxchisq': 5.0})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 192061 0.9114814937853858
Effective Cut = 0.846
Number of Stars = 156714, Fraction(%) = 87.8846, Ratio(obj:star)*100 = 100
Number of Galaxies = 182, Fraction(%) = 0.1021, Ratio(obj:star)*100 = 0.1161
Number of QSOs = 21422, Fraction(%) = 12.0134, Ratio(obj:star)*100 = 13.6695
StarFracChange = 4.550 (%)
GalFracChange = -0.040 (%)
QSOFracChange = -4.510 (%)


In [16]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), ('bayestar_chisq_per_passband', {'maxchisq': 5.0}),
            ('e_nonneg_cut_sigfac', {'sigfac': 5.0})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 192061 0.9114814937853858
Cut3 197696 0.938224029841538
Effective Cut = 0.821
Number of Stars = 155441, Fraction(%) = 89.7984, Ratio(obj:star)*100 = 100
Number of Galaxies = 175, Fraction(%) = 0.1011, Ratio(obj:star)*100 = 0.1126
Number of QSOs = 17484, Fraction(%) = 10.1005, Ratio(obj:star)*100 = 11.2480
StarFracChange = 6.464 (%)
GalFracChange = -0.041 (%)
QSOFracChange = -6.423 (%)


In [17]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), ('bayestar_chisq_per_passband', {'maxchisq': 5.0}),
            ('e_nonneg_cut_sigfac', {'sigfac': 5.0}), ('wise_svmnondetectioncombinedcut', {'model': wisemodel})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 192061 0.9114814937853858
Cut3 197696 0.938224029841538
Cut4 176656 0.8383725731207852
Effective Cut = 0.738
Number of Stars = 154486, Fraction(%) = 99.3058, Ratio(obj:star)*100 = 100
Number of Galaxies = 142, Fraction(%) = 0.0913, Ratio(obj:star)*100 = 0.0919
Number of QSOs = 938, Fraction(%) = 0.6030, Ratio(obj:star)*100 = 0.6072
StarFracChange = 15.971 (%)
GalFracChange = -0.051 (%)
QSOFracChange = -15.920 (%)


In [18]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), ('bayestar_chisq_per_passband', {'maxchisq': 5.0}),
            ('e_nonneg_cut_sigfac', {'sigfac': 5.0}), ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('sdss_uvcut', {'model': smodel})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 192061 0.9114814937853858
Cut3 197696 0.938224029841538
Cut4 176656 0.8383725731207852
Cut5 175386 0.8323454177008538
Effective Cut = 0.730
Number of Stars = 153578, Fraction(%) = 99.8375, Ratio(obj:star)*100 = 100
Number of Galaxies = 126, Fraction(%) = 0.0819, Ratio(obj:star)*100 = 0.0820
Number of QSOs = 124, Fraction(%) = 0.0806, Ratio(obj:star)*100 = 0.0807
StarFracChange = 16.503 (%)
GalFracChange = -0.060 (%)
QSOFracChange = -16.443 (%)


### Alternative cuts

In [19]:
wisemodel = joblib.load('models/svm_panstarrs_quasarsep_balanced.joblib')
smodel = joblib.load('models/svm_sdss_quasarsep_unbalanced.joblib')
w123model = joblib.load(nbtest_dir+'intermed/2_4/svm_w1-w2-w3_2-4.joblib')
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('e_nonneg_cut_sigfac', {'sigfac': 5.0}),
            ('bayestar_chisq_per_passband', {'maxchisq': 5.0}), 
            ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('sdss_uvcut', {'model': smodel}),
            ('dm_sigma_cut', {'maxsig': 1.5}), ('w1_w2_w3cut', {'model': w123model})]

final_cut = combine_cuts(df_goodspec, cuts_list) #intersection of all cuts
df15e = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(df15e['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 197696 0.938224029841538
Cut2 192061 0.9114814937853858
Cut3 176656 0.8383725731207852
Cut4 175386 0.8323454177008538
Cut5 200235 0.9502735948897315
Cut6 178034 0.8449122740409941
Effective Cut = 0.729
Number of Stars = 153424, Fraction(%) = 99.8380, Ratio(obj:star)*100 = 100
Number of Galaxies = 125, Fraction(%) = 0.0813, Ratio(obj:star)*100 = 0.0815
Number of QSOs = 124, Fraction(%) = 0.0807, Ratio(obj:star)*100 = 0.0808


In [20]:
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], df15e['sdss_dr17_specobj.CLASS'])

StarFracChange = 16.503 (%)
GalFracChange = -0.061 (%)
QSOFracChange = -16.443 (%)


In [21]:
wisemodel = joblib.load('models/svm_panstarrs_quasarsep_balanced.joblib')
smodel = joblib.load('models/svm_sdss_quasarsep_unbalanced.joblib')
w123model = joblib.load(nbtest_dir+'intermed/2_4/svm_w1-w2-w3_2-4.joblib')
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('e_nonneg_cut_sigfac', {'sigfac': 5.0}),
            ('bayestar_chisq_per_passband', {'maxchisq': 5.0}), 
            ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('sdss_uvcut', {'model': smodel}),
            ('dm_sigma_cut', {'maxsig': 1.5}), ('e_sigma_cut', {'maxsig': 0.4})]

final_cut = combine_cuts(df_goodspec, cuts_list) #intersection of all cuts
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 197696 0.938224029841538
Cut2 192061 0.9114814937853858
Cut3 176656 0.8383725731207852
Cut4 175386 0.8323454177008538
Cut5 200235 0.9502735948897315
Cut6 204439 0.9702249030672051
Effective Cut = 0.722
Number of Stars = 151872, Fraction(%) = 99.8468, Ratio(obj:star)*100 = 100
Number of Galaxies = 118, Fraction(%) = 0.0776, Ratio(obj:star)*100 = 0.0777
Number of QSOs = 115, Fraction(%) = 0.0756, Ratio(obj:star)*100 = 0.0757


In [22]:
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

StarFracChange = 16.512 (%)
GalFracChange = -0.064 (%)
QSOFracChange = -16.448 (%)


In [26]:
importlib.reload(astro_cuts)

<module 'astro_cuts' from '/n/holylfs05/LABS/finkbeiner_lab/Everyone/highlat/notebooks_main1/spectra_matched/../../methods_code_Nresol/astro_cuts.py'>

In [24]:
#some contiguous patches have all mags at 22.5 -- NEED to impose that criterion in detected_all
wisemodel = joblib.load('models/svm_panstarrs_quasarsep_balanced.joblib')
smodel = joblib.load('models/svm_sdss_quasarsep_unbalanced.joblib')
w123model = joblib.load(nbtest_dir+'intermed/2_4/svm_w1-w2-w3_2-4.joblib')
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('e_nonneg_cut_sigfac', {'sigfac': 5.0}),
            ('bayestar_chisq_per_passband', {'maxchisq': 5.0}), 
            ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('sdss_uvcut_strict', {'model': smodel}),
            ('dm_sigma_cut', {'maxsig': 1.5}), ('e_sigma_cut', {'maxsig': 0.4})]

final_cut = combine_cuts(df_goodspec, cuts_list) #intersection of all cuts
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 197696 0.938224029841538
Cut2 192061 0.9114814937853858
Cut3 176656 0.8383725731207852
Cut4 174196 0.8266979256144614
Cut5 200235 0.9502735948897315
Cut6 204439 0.9702249030672051
Effective Cut = 0.718
Number of Stars = 151237, Fraction(%) = 99.9135, Ratio(obj:star)*100 = 100
Number of Galaxies = 77, Fraction(%) = 0.0509, Ratio(obj:star)*100 = 0.0509
Number of QSOs = 54, Fraction(%) = 0.0357, Ratio(obj:star)*100 = 0.0357


In [25]:
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

StarFracChange = 16.579 (%)
GalFracChange = -0.091 (%)
QSOFracChange = -16.488 (%)


In [30]:
#15i: with the 22.5 check in det_all
wisemodel = joblib.load('models/svm_panstarrs_quasarsep_balanced.joblib')
smodel = joblib.load('models/svm_sdss_quasarsep_unbalanced.joblib')
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('e_nonneg_cut_sigfac', {'sigfac': 5.0}),
            ('bayestar_chisq_per_passband', {'maxchisq': 5.0}), 
            ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('sdss_uvcut', {'model': smodel}),
            ('dm_sigma_cut', {'maxsig': 1.5}), ('e_sigma_cut', {'maxsig': 0.4})]

final_cut = combine_cuts(df_goodspec, cuts_list) #intersection of all cuts
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 197696 0.938224029841538
Cut2 192061 0.9114814937853858
Cut3 176656 0.8383725731207852
Cut4 175386 0.8323454177008538
Cut5 200235 0.9502735948897315
Cut6 204439 0.9702249030672051
Effective Cut = 0.722
Number of Stars = 151872, Fraction(%) = 99.8468, Ratio(obj:star)*100 = 100
Number of Galaxies = 118, Fraction(%) = 0.0776, Ratio(obj:star)*100 = 0.0777
Number of QSOs = 115, Fraction(%) = 0.0756, Ratio(obj:star)*100 = 0.0757


In [31]:
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

StarFracChange = 16.512 (%)
GalFracChange = -0.064 (%)
QSOFracChange = -16.448 (%)
