* Main: This notebook contains the file that has tests on how different cuts remove objects on goodspec.
* Updated with 16a 

In [1]:
import numpy as np
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
import joblib

from astropy.io import fits
from functools import reduce
from astropy.table import Table

sys.path.append('../../methods_code_Nresol/')
sys.path.append('../../utils/')
import astro_cuts
import importlib
import spec_utils

In [2]:
dat = Table.read('lsd_scripts/spectrain_2_11_r50.fits', format='fits')

In [3]:
np.unique(np.array(dat['sdss_dr17_specobj.CLASS']))

array([b'GALAXY', b'QSO', b'STAR'], dtype='|S6')

In [4]:
fname = 'lsd_scripts/spectrain_2_11_r50.fits'
df_allspec = spec_utils.convert_to_dataframe_specmatched(fname)

  df['reduced_chisq'] = df['chisq'].to_numpy() * n_passbands / (n_passbands - 4)
  sdss_flux_sig = np.power(np.array(dat['sdss_dr14_starsweep.psfflux_ivar']), -0.5)
  df['sdss.pmag_'+b] = 22.5 - 2.5*np.clip(np.log10(np.array(dat['sdss_dr14_starsweep.psfflux'])[:, ib]), 0.0, np.inf)
  df['sdss.pmag_'+b] = 22.5 - 2.5*np.clip(np.log10(np.array(dat['sdss_dr14_starsweep.psfflux'])[:, ib]), 0.0, np.inf)


In [5]:
importlib.reload(spec_utils)

<module 'spec_utils' from '/n/holylfs05/LABS/finkbeiner_lab/Everyone/highlat/notebooks_main1/spectra_matched/spec_utils.py'>

In [6]:
df_goodspec = spec_utils.return_goodspec(df_allspec)

Number of objects before / after 596315 210713
c1 = 0.873, c2 = 0.995, c3 = 0.445
Fraction left = 0.353


## Section 1: 
### How many objects are not stars before cuts?

In [7]:
importlib.reload(spec_utils)
#make a table for this instead

<module 'spec_utils' from '/n/holylfs05/LABS/finkbeiner_lab/Everyone/highlat/notebooks_main1/spectra_matched/spec_utils.py'>

In [8]:
spec_utils.print_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'])

Number of Stars = 175597, Fraction(%) = 83.3347, Ratio(obj:star)*100 = 100
Number of Galaxies = 299, Fraction(%) = 0.1419, Ratio(obj:star)*100 = 0.1703
Number of QSOs = 34817, Fraction(%) = 16.5234, Ratio(obj:star)*100 = 19.8278


### After cuts: 16a

In [9]:
nbtest_dir = '../../notebooks_test/'

def combine_cuts(df_input, cuts_list):
    cutfuncs = [getattr(astro_cuts, ctup[0]) for ctup in cuts_list]
    masklist = []
    for c, ctup in enumerate(cuts_list):
        if len(ctup)==1:
            masklist.append(cutfuncs[c](df_input))
        else:
            masklist.append(cutfuncs[c](df_input, **ctup[1]))
        print('Cut{} {} {}'.format(c, np.sum(masklist[c]), np.sum(masklist[c])/len(masklist[c])))
    final_cut = reduce(np.logical_and, masklist) #intersection of all cuts
    print('Effective Cut = {:.3f}'.format(np.sum(final_cut)/len(final_cut)))
    return final_cut

In [10]:
os.listdir('models')

['.ipynb_checkpoints',
 'svm_sdss_quasarsep_unbalanced.joblib',
 'svm_panstarrs_quasarsep_balanced.joblib',
 'svm_sdss_quasarsep_balanced.joblib']

In [11]:
wisemodel = joblib.load('models/svm_panstarrs_quasarsep_balanced.joblib')
smodel = joblib.load('models/svm_sdss_quasarsep_unbalanced.joblib')

cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), 
             ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('parallax_nan_edr3',),
             ('sdss_uvcut', {'model': smodel}), ('bayestar_chisq', {'maxchisq': 3.0})]

final_cut = combine_cuts(df_goodspec, cuts_list) #intersection of all cuts
df15k = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(df15k['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 176656 0.8383725731207852
Cut3 210106 0.997119304456773
Cut4 175386 0.8323454177008538
Cut5 156011 0.740395704109381
Effective Cut = 0.667
Number of Stars = 140519, Fraction(%) = 99.9232, Ratio(obj:star)*100 = 100
Number of Galaxies = 55, Fraction(%) = 0.0391, Ratio(obj:star)*100 = 0.0391
Number of QSOs = 53, Fraction(%) = 0.0377, Ratio(obj:star)*100 = 0.0377


In [12]:
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], df15k['sdss_dr17_specobj.CLASS'])

StarFracChange = 16.589 (%)
GalFracChange = -0.103 (%)
QSOFracChange = -16.486 (%)
Stars thrown out = -35078


In [13]:
nbtest_dir

'../../notebooks_test/'

### Ablation Tests: which cut is doing what?

In [14]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Effective Cut = 0.931
Number of Stars = 164022, Fraction(%) = 83.6224, Ratio(obj:star)*100 = 100
Number of Galaxies = 282, Fraction(%) = 0.1438, Ratio(obj:star)*100 = 0.1719
Number of QSOs = 31842, Fraction(%) = 16.2338, Ratio(obj:star)*100 = 19.4132
StarFracChange = 0.288 (%)
GalFracChange = 0.002 (%)
QSOFracChange = -0.290 (%)
Stars thrown out = -11575


In [15]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), ('wise_svmnondetectioncombinedcut', {'model': wisemodel})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 176656 0.8383725731207852
Effective Cut = 0.783
Number of Stars = 162841, Fraction(%) = 98.6527, Ratio(obj:star)*100 = 100
Number of Galaxies = 208, Fraction(%) = 0.1260, Ratio(obj:star)*100 = 0.1277
Number of QSOs = 2016, Fraction(%) = 1.2213, Ratio(obj:star)*100 = 1.2380
StarFracChange = 15.318 (%)
GalFracChange = -0.016 (%)
QSOFracChange = -15.302 (%)
Stars thrown out = -12756


In [16]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('parallax_nan_edr3',)]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 176656 0.8383725731207852
Cut3 210106 0.997119304456773
Effective Cut = 0.781
Number of Stars = 162500, Fraction(%) = 98.6822, Ratio(obj:star)*100 = 100
Number of Galaxies = 154, Fraction(%) = 0.0935, Ratio(obj:star)*100 = 0.0948
Number of QSOs = 2016, Fraction(%) = 1.2243, Ratio(obj:star)*100 = 1.2406
StarFracChange = 15.348 (%)
GalFracChange = -0.048 (%)
QSOFracChange = -15.299 (%)
Stars thrown out = -13097


In [17]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), 
             ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('parallax_nan_edr3',),
             ('sdss_uvcut', {'model': smodel})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 176656 0.8383725731207852
Cut3 210106 0.997119304456773
Cut4 175386 0.8323454177008538
Effective Cut = 0.765
Number of Stars = 160589, Fraction(%) = 99.6661, Ratio(obj:star)*100 = 100
Number of Galaxies = 119, Fraction(%) = 0.0739, Ratio(obj:star)*100 = 0.0741
Number of QSOs = 419, Fraction(%) = 0.2600, Ratio(obj:star)*100 = 0.2609
StarFracChange = 16.331 (%)
GalFracChange = -0.068 (%)
QSOFracChange = -16.263 (%)
Stars thrown out = -15008


In [18]:
cuts_list = [('distmod_median_cut', {'mindm': 8}), ('dm_sigma_cut', {'maxsig': 1.5}), 
             ('wise_svmnondetectioncombinedcut', {'model': wisemodel}), ('parallax_nan_edr3',),
             ('sdss_uvcut', {'model': smodel}), ('bayestar_chisq', {'maxchisq': 3.0})]

final_cut = combine_cuts(df_goodspec, cuts_list)
dfalt = df_goodspec.iloc[final_cut, :]
spec_utils.print_obj_distribution(dfalt['sdss_dr17_specobj.CLASS'])
spec_utils.print_delta_obj_distribution(df_goodspec['sdss_dr17_specobj.CLASS'], dfalt['sdss_dr17_specobj.CLASS'])

Cut0 206622 0.9805849662811502
Cut1 200235 0.9502735948897315
Cut2 176656 0.8383725731207852
Cut3 210106 0.997119304456773
Cut4 175386 0.8323454177008538
Cut5 156011 0.740395704109381
Effective Cut = 0.667
Number of Stars = 140519, Fraction(%) = 99.9232, Ratio(obj:star)*100 = 100
Number of Galaxies = 55, Fraction(%) = 0.0391, Ratio(obj:star)*100 = 0.0391
Number of QSOs = 53, Fraction(%) = 0.0377, Ratio(obj:star)*100 = 0.0377
StarFracChange = 16.589 (%)
GalFracChange = -0.103 (%)
QSOFracChange = -16.486 (%)
Stars thrown out = -35078
