# ASTR 499 - Notebook 1

In [1]:
%matplotlib inline
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import pandas as pd
from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import units as u
import tarfile
import sqlite3
import lsst.daf.persistence as dafPersist

## Listing Quasars

##### HiTS DR1 Full Dataset

In [2]:
hitsDataDir='/epyc/users/mrawls/premap2019/hits-dr1/classification'
hitsFilename='HiTS_RF_predictions.fits'
hitsFilepath=os.path.join(hitsDataDir,hitsFilename)
hitsTable=fits.open(hitsFilepath) # load data as an astropy fits thing
hitsDf=pd.DataFrame(hitsTable[1].data) # turn data into a pandas dataframe
hitsDf.head() # shows us what the dataframe looks like

Unnamed: 0,ID,internalID,raMedian,decMedian,Variable_prob,Periodic_prob,DSCT_Prob,EB_Prob,ROTVAR_Prob,RRLYR_Prob,CV_Prob,QSO_Prob,SNe_Prob,Predicted_class
0,HiTS090434-041626,Blind15A_02_S1_1856_0087,136.145756,-4.273908,0.0,,,,,,,,,NV
1,HiTS090434-042338,Blind15A_02_N1_1253_0089,136.145561,-4.394096,0.0,,,,,,,,,NV
2,HiTS090434-042506,Blind15A_02_N1_1587_0054,136.142838,-4.418445,0.0,,,,,,,,,NV
3,HiTS090434-042629,Blind15A_02_N1_1903_0088,136.145272,-4.441517,0.0,,,,,,,,,NV
4,HiTS090435-000829,Blind15A_04_S1_0030_0051,136.14609,-0.141399,0.033194,,,,,,,,,NV


##### HiTS DR1 Subset

In [3]:
hitsDataDir_sub='/epyc/users/mrawls/premap2019/hits-dr1'
hitsFilename_sub='HiTS_DR1_variables_DM-dataset-subset.fits'
hitsFilepath_sub=os.path.join(hitsDataDir_sub,hitsFilename_sub)
hitsTable_sub=fits.open(hitsFilepath_sub) # load data as an astropy fits thing
hitsDf_sub=pd.DataFrame(hitsTable_sub[1].data) # turn data into a pandas dataframe
hitsDf_sub.head() # shows us what the dataframe looks like

Unnamed: 0,ID,internalID,raMedian,decMedian,Variable_prob,Periodic_prob,DSCT_Prob,EB_Prob,ROTVAR_Prob,RRLYR_Prob,...,Rcs,Skew,SlottedA_length,SmallKurtosis,Std,StetsonK,StetsonK_AC,g-i,g-r,r-i
0,HiTS095611+020452,Blind15A_26_N1_1715_0146,149.048038,2.081381,0.523601,0.202651,,,,,...,0.246009,-0.584446,0.134705,2.100608,0.032812,0.773151,0.738233,2.859381,1.439736,1.419645
1,HiTS095645+021135,Blind15A_26_N1_0181_2093,149.190225,2.193068,0.564932,0.635018,0.4,0.1,0.375,0.125,...,0.119267,-0.133774,0.067353,-0.444997,0.123052,0.841553,0.815632,0.99604,0.769289,0.226751
2,HiTS095652+023537,Blind15A_26_S14_1446_0379,149.220445,2.593841,0.962477,0.635836,0.025,0.125,0.75,0.1,...,0.227995,-0.317121,0.26941,-0.008631,0.067782,0.849505,0.846981,1.911452,1.484048,0.427403
3,HiTS095702+023628,Blind15A_26_S14_1253_0945,149.26178,2.607883,0.53239,0.933333,0.55,0.225,0.05,0.175,...,0.179956,-0.288333,0.067353,-1.067079,0.18179,0.853063,0.754798,,,
4,HiTS095726+015556,Blind15A_26_N8_1496_2317,149.361376,1.932343,0.97228,0.889344,0.05,0.175,0.625,0.15,...,0.269737,-0.035972,0.740878,-1.731678,0.089139,0.957841,0.948092,1.185192,0.894064,0.291128


##### Number of QSO's for Various Probabilities for HiTS DR1 Full Dataset

In [4]:
hitsDf.query('QSO_Prob > .50')['ID'].count()

1834

In [5]:
hitsDf.query('QSO_Prob > .70')['ID'].count()

1186

In [6]:
hitsDf.query('QSO_Prob > .80')['ID'].count()

630

In [7]:
hitsDf.query('QSO_Prob > .90')['ID'].count()

160

In [8]:
hitsDf.query('QSO_Prob > .95')['ID'].count()

57

In [9]:
hitsDf.query('QSO_Prob > .99')['ID'].count()

14

##### Numbers of QSO's for Various Probabilities for HiTS DR1 Subset

In [10]:
hitsDf_sub.query('QSO_Prob > .50')['ID'].count()

86

In [12]:
hitsDf_sub.query('QSO_Prob > .70')['ID'].count()

46

In [13]:
hitsDf_sub.query('QSO_Prob > .80')['ID'].count()

20

In [15]:
hitsDf_sub.query('QSO_Prob > .90')['ID'].count()

5

In [16]:
hitsDf_sub.query('QSO_Prob > .95')['ID'].count()

3

In [17]:
hitsDf_sub.query('QSO_Prob > .99')['ID'].count()

0

### Makaing New QSO Data Frames

##### HiTS DR1 Full Dataset

In [20]:
hitsDf.columns

Index(['ID', 'internalID', 'raMedian', 'decMedian', 'Variable_prob',
       'Periodic_prob', 'DSCT_Prob', 'EB_Prob', 'ROTVAR_Prob', 'RRLYR_Prob',
       'CV_Prob', 'QSO_Prob', 'SNe_Prob', 'Predicted_class'],
      dtype='object')

In [75]:
hitsDfQSO_filter = hitsDf[hitsDf['QSO_Prob'] > 0.80]

In [76]:
hitsDfQSO = hitsDfQSO_filter.drop(['Variable_prob', 'Periodic_prob', 'DSCT_Prob', 'EB_Prob', 'Predicted_class',
                                   'ROTVAR_Prob', 'RRLYR_Prob', 'CV_Prob', 'SNe_Prob'], axis = 1)

In [77]:
hitsDfQSO.head()

Unnamed: 0,ID,internalID,raMedian,decMedian,QSO_Prob
2668,HiTS090524-041110,Blind15A_02_S1_0648_2891,136.350909,-4.186367,0.975
4891,HiTS090543-041323,Blind15A_02_S1_1148_3999,136.431651,-4.223211,0.825
7190,HiTS090602-040245,Blind15A_02_S8_0977_2949,136.510771,-4.045886,0.95
7548,HiTS090604-044418,Blind15A_02_N14_1454_3046,136.516803,-4.738595,0.9
8304,HiTS090610-000337,Blind15A_04_S8_1177_3373,136.543299,-0.060552,0.85


##### HiTS DR1 Subset

In [79]:
hitsDf_sub.columns

Index(['ID', 'internalID', 'raMedian', 'decMedian', 'Variable_prob',
       'Periodic_prob', 'DSCT_Prob', 'EB_Prob', 'ROTVAR_Prob', 'RRLYR_Prob',
       'CV_Prob', 'QSO_Prob', 'SNe_Prob', 'Predicted_class', 'ID_feat',
       'raMedian_feat', 'decMedian_feat', 'Amplitude', 'AndersonDarling',
       'Autocor_length', 'Beyond1Std', 'CAR_mean', 'CAR_sigma', 'CAR_tau',
       'Con', 'Eta_e', 'FluxPercentileRatioMid20', 'FluxPercentileRatioMid35',
       'FluxPercentileRatioMid50', 'FluxPercentileRatioMid65',
       'FluxPercentileRatioMid80', 'Freq1_harmonics_amplitude_0',
       'Freq1_harmonics_amplitude_1', 'Freq1_harmonics_amplitude_2',
       'Freq1_harmonics_amplitude_3', 'Freq1_harmonics_rel_phase_0',
       'Freq1_harmonics_rel_phase_1', 'Freq1_harmonics_rel_phase_2',
       'Freq1_harmonics_rel_phase_3', 'Freq2_harmonics_amplitude_0',
       'Freq2_harmonics_amplitude_1', 'Freq2_harmonics_amplitude_2',
       'Freq2_harmonics_amplitude_3', 'Freq2_harmonics_rel_phase_0',
       'Fre

In [83]:
hitsDfQSO_sub_filter = hitsDf_sub[hitsDf_sub['QSO_Prob'] > 0.80]

In [None]:
hitsDfQSO_sub = hitsDfQSO_sub_filter.drop([], axis = 1)

##### I'm going to stop here for now until we can discuss what columns we should include. It also seems like the full dataset has very few columns, so I'd like to talk about getting more info on the new dataframe so we can plot light curves.