In [41]:
# means that my matplotlib graphs will be included in the notebook, next to the code
%matplotlib inline

import os

import random
import numpy as np
import tables as tb
import pandas as pd
import matplotlib.pyplot as plt

from astropy.table import Table, Column, join
from astropy.coordinates import SkyCoord
from astropy.io import fits
import astropy.units as u

from hetdex_api.config import HDRconfig
from hetdex_api.detections import Detections
from hetdex_api.elixer_widget_cls import ElixerWidget

In [11]:
# not sure why the code below is here, it was in the Detections database and API notebook
# https://github.com/HETDEX/hetdex_api/blob/master/notebooks/api-notebooks/03-Detections_Database_and_API.ipynb

In [12]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}


<IPython.core.display.Javascript object>

In [13]:
HDR_source_cat = fits.open('/home/jovyan/Hobby-Eberly-Telesco/hdr3/catalogs/source_catalog_3.0.1.fits', memmap = True)
#fits.open(filename, memmap = True)
HDR3_data = HDR_source_cat[1].data
HDR3_DF = pd.DataFrame(HDR3_data, columns=HDR3_data.columns.names)

In [14]:
# Columns we will then take from the entire data set (it was huge so we needed to determine what we wanted to look at specifically).
# As the name suggests, these are the ones that are useful to us!
useful_hdr3_cols = ['source_id', 'detectid',  'selected_det', 'ra_mean', 'dec_mean', 'fwhm', 'shotid', 'field',  'ra', 'dec', 'wave', 'wave_err', 'flux', 'flux_err', 'sn', 'sn_err', 'chi2', 'chi2_err',
'linewidth', 'linewidth_err', 'plya_classification', 'z_hetdex', 'z_hetdex_conf', 'combined_plae']

# From the original DF, taking the useful columns
reduced_hdr3_df = HDR3_DF.loc[:, useful_hdr3_cols]
print(reduced_hdr3_df.size)

38267280


In [15]:
# Removing data from before 2017 because it isn't good (not useful to us)
removed_bad_shots_df = reduced_hdr3_df[reduced_hdr3_df.shotid.values >= 20180000000]
print(removed_bad_shots_df.size)

37544112


Is this enough to make a noise sample? Do I need to do anything else with the data? Seems too easy but I don't know.

We want no source. This catalog has HETDEX detections one. 

    Want to make sure:
        1.No hetdection detec
        
        2. No imaging counterpart. Do some cross-matching. Gives us a 0 and THEN we extract. Want to extract in basically empty space. Start with 100. 

### Question: Do I need to keep this signal to noise filter?

In [29]:
# This will give high confidence detections. Something we would want to do also. What is sn threshold that Valentina's code is having trouble with.
signal_to_noise_interval = removed_bad_shots_df[removed_bad_shots_df['sn'] > 6.5]
print(signal_to_noise_interval.size)

9101040


### Question about 'field' column.

Do I need to specify a HETDEX field? cause it says there are some egs. Not sure if that's in the scope of my project.

In [30]:
signal_to_noise_interval

Unnamed: 0,source_id,detectid,selected_det,ra_mean,dec_mean,fwhm,shotid,field,ra,dec,wave,wave_err,flux,flux_err,sn,sn_err,chi2,chi2_err,linewidth,linewidth_err,plya_classification,z_hetdex,z_hetdex_conf,combined_plae
25155,3010000025254,3000462158,False,7.868332,0.030342,1.309933,20180104007,dex-fall,7.868559,0.029655,5235.350098,0.42,17.236435,2.059032,6.780000,0.44,1.04,0.22,2.77,0.38,0.001,0.403866,0.900000,0.010113
25156,3010000025378,3000462159,True,7.871345,0.028024,1.309933,20180104007,dex-fall,7.871345,0.028024,4633.870117,0.44,19.362127,2.402835,7.310000,0.48,1.00,0.22,2.84,0.41,0.001,0.243058,0.232867,0.244185
25157,3010000025254,3000462160,False,7.868332,0.030342,1.309933,20180104007,dex-fall,7.867933,0.031079,5229.189941,0.47,17.185760,2.144239,6.810000,0.51,0.99,0.22,2.97,0.42,0.001,0.403866,0.900000,0.010610
25162,3010000025373,3000462181,True,7.876181,0.047542,1.309933,20180104007,dex-fall,7.876181,0.047542,5136.430176,0.37,28.896681,2.668531,9.290000,0.51,1.02,0.22,3.29,0.34,0.001,0.377182,0.800000,0.002744
25173,3010000025362,3000462281,True,7.946422,0.046738,1.309933,20180104007,dex-fall,7.946422,0.046738,5049.540039,0.16,48.436264,2.287239,17.230000,0.52,0.98,0.22,2.76,0.15,0.001,0.354563,0.232867,0.001408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1365978,3010001385675,3013842807,True,215.014282,52.901100,1.683606,20220404017,egs,215.014282,52.901100,5120.149902,0.19,63.854778,3.043135,20.700001,0.43,1.03,0.22,3.40,0.19,0.001,0.373095,0.800000,0.001000
1365999,3010001385656,3013842935,True,214.925995,52.822315,1.683606,20220404017,egs,214.925995,52.822315,5103.379883,0.46,16.612011,2.008942,7.270000,0.53,1.07,0.22,3.05,0.41,0.001,0.369006,0.980000,0.030379
1366007,3010001385648,3013842979,True,214.790756,52.778305,1.683606,20220404017,egs,214.790756,52.778309,4830.020020,0.35,31.364128,2.692156,11.080000,0.45,1.00,0.23,3.40,0.37,0.001,0.295676,0.980000,0.037425
1366016,3010001385639,3013843028,True,214.908493,52.784447,1.683606,20220404017,egs,214.908493,52.784447,4474.029785,0.17,36.628250,1.740303,12.300000,0.64,1.34,0.22,3.13,0.16,0.001,0.200016,0.800000,0.009683


In [314]:
signal_to_noise_interval.columns

Index(['source_id', 'detectid', 'selected_det', 'ra_mean', 'dec_mean', 'fwhm',
       'shotid', 'field', 'ra', 'dec', 'wave', 'wave_err', 'flux', 'flux_err',
       'sn', 'sn_err', 'chi2', 'chi2_err', 'linewidth', 'linewidth_err',
       'plya_classification', 'z_hetdex', 'z_hetdex_conf', 'combined_plae'],
      dtype='object')

Start with detection. One approach was fits file with coordinates. 

Or 

Use this but expand upon it. Find RA and DEC of each shot. And randomly extract.

    delta ra and delta dec. Double check if is there a source there. 
    
For noise sample, no need to run through valentina's code. Only focus on High-z after filtering through Valentina's code.

In [32]:
signal_to_noise_interval['ra_mean'].head()

25155    7.868332
25156    7.871345
25157    7.868332
25162    7.876181
25173    7.946422
Name: ra_mean, dtype: float32

### Question about checking source
Is it just checking if that combination of random RA and DEC are in the catalog?

In [114]:
# I want to get a random RA and a random DEC, and then do the followng:
# Check if there is a source there by comparing to ra_mean and dec_mean in catalog??
ra_lower_random_bound = signal_to_noise_interval['ra_mean'].min()
ra_upper_random_bound = signal_to_noise_interval['ra_mean'].max()

dec_lower_random_bound = signal_to_noise_interval['dec_mean'].min()
dec_upper_random_bound = signal_to_noise_interval['dec_mean'].max()

### Would I check if both the RA and DEC are in the catalog? So like (Random_RA, Random_DEC) or just like check for one so if Random_RA or Random_DEC

As in, do I mame a random number for RA and a random number for DEC and then check if the combination (tuple) of both of those random numbers is in the catalog? That seems like it would be very unlikely. Especially with like decimal truncation.

In [116]:
signal_to_noise_interval['dec_mean'].max()

67.86091613769531

In [311]:
while truth_test.size == 0:
    truth_test = signal_to_noise_interval[signal_to_noise_interval['ra_mean'] == random.uniform(lower_random_bound, upper_random_bound)]
truth_test

Unnamed: 0,source_id,detectid,selected_det,ra_mean,dec_mean,fwhm,shotid,field,ra,dec,wave,wave_err,flux,flux_err,sn,sn_err,chi2,chi2_err,linewidth,linewidth_err,plya_classification,z_hetdex,z_hetdex_conf,combined_plae
1271455,3010001286702,3012646454,False,273.418579,66.694229,2.843162,20210508009,nep,273.418518,66.694168,4238.259766,1.42,97.927902,18.171225,11.39,0.77,1.53,0.23,9.74,1.64,0.272226,1.218806,0.9,0.001


In [313]:
truth_test

Unnamed: 0,source_id,detectid,selected_det,ra_mean,dec_mean,fwhm,shotid,field,ra,dec,wave,wave_err,flux,flux_err,sn,sn_err,chi2,chi2_err,linewidth,linewidth_err,plya_classification,z_hetdex,z_hetdex_conf,combined_plae
1271455,3010001286702,3012646454,False,273.418579,66.694229,2.843162,20210508009,nep,273.418518,66.694168,4238.259766,1.42,97.927902,18.171225,11.39,0.77,1.53,0.23,9.74,1.64,0.272226,1.218806,0.9,0.001
