In [24]:
# means that my matplotlib graphs will be included in the notebook, next to the code
%matplotlib inline

import os

import math
import astropy
import random
import numpy as np
import tables as tb
import pandas as pd
import matplotlib.pyplot as plt

from astropy.table import Table, Column, join
from astropy.coordinates import SkyCoord
from astropy.io import fits
import astropy.units as u

from hetdex_tools.get_spec import get_spectra
from hetdex_api.config import HDRconfig
from hetdex_api.detections import Detections
from hetdex_api.elixer_widget_cls import ElixerWidget

In [2]:
# not sure why the code below is here, it was in the Detections database and API notebook
# https://github.com/HETDEX/hetdex_api/blob/master/notebooks/api-notebooks/03-Detections_Database_and_API.ipynb

In [3]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

### Opens the catalogs and turns them into dataframes

I like to open both catalogs separately since they are both big (HDR3 especially!)

In [4]:
# Opening H20 NEP catalog and converting it into a pandas DF
H20_NEP_catalog = fits.open('H20_NEP_VIRUS_OVERLAP_CAT_10_2021.fits', memmap = True)
H20_NEP_data = H20_NEP_catalog[1].data
H20_NEP_DF = pd.DataFrame(H20_NEP_data, columns=H20_NEP_data.columns.names)

In [5]:
# Opening HDR3 detections catalog  ** double check this <-- statement ** and converting it into a pandas DF
HDR_source_cat = fits.open('/home/jovyan/Hobby-Eberly-Telesco/hdr3/catalogs/source_catalog_3.0.1.fits', memmap = True)
HDR3_data = HDR_source_cat[1].data
HDR3_DF = pd.DataFrame(HDR3_data, columns=HDR3_data.columns.names)

In [6]:
# Columns we will then take from the entire data set (it was huge so we needed to determine what we wanted to look at specifically).
# As the name suggests, these are the ones that are useful to us!
useful_hdr3_cols = ['source_id', 'detectid',  'selected_det', 'ra_mean', 'dec_mean', 'fwhm', 'shotid', 'field',  'ra', 'dec', 'wave', 'wave_err', 'flux', 'flux_err', 'sn', 'sn_err', 'chi2', 'chi2_err',
'linewidth', 'linewidth_err', 'plya_classification', 'z_hetdex', 'z_hetdex_conf', 'combined_plae']

# For now, the only useful columns for us in H20 NEP is RA and DEC.
useful_h20nep_cols = ['RA_MODELING', 'DEC_MODELING', 'VALID_SOURCE_MODELING']

# From the original DF, taking the useful columns
reduced_hdr3_df = HDR3_DF.loc[:, useful_hdr3_cols]
reduced_h20nep_df = H20_NEP_DF.loc[:, useful_h20nep_cols]

### Cleaning up the data

In [7]:
# Removing data from before 2017 because it isn't good (not useful to us)
# No need to do this for H20 NEP
removed_bad_shots_hdr3_df = reduced_hdr3_df[reduced_hdr3_df.shotid.values >= 20180000000]

### Info for VALID_SOURCE_MODELING column

for the H20 data we wont use any filters aside from the the VALID_SOURCE_MODELING column which just tells us that the model was able to converge and get fluxes from the source.

No need! For H20 we wont use any filters aside from the the VALID_SOURCE_MODELING column which just tells us that the model was able to converge and get fluxes from the source.

Nah the False valid source modeling means that the model used to measure the fluxes failed somehow so we cannot use that galaxy reliably

We want the true ones since we know the model was able to find a galaxy and we can use that for our imaging counterpart identification (ie: to use these galaxy to check if there is a galaxy at our new extraction coordinate)

### Filtering data. For HDR3 we use a signal to noise greater than 6.5 and for H20 NEP we check if the VALID_SOURCE_MODELING is true

In [8]:
# This will give high confidence detections. Something we would want to do also. What is sn threshold that Valentina's code is having trouble with.
# Reason why, we want high-confidence Lya. If we are very confident sn and another filter, then that's what we consider high-conf lya.
# Once noise and high-confidence sample. We can start exanping on valentina's code and do our own stuff
signal_to_noise_interval = removed_bad_shots_hdr3_df[removed_bad_shots_hdr3_df['sn'] > 6.5]

# For now, no need to specify a field. But once trained, we want to run this for the NEP field!

valid_source_check = reduced_h20nep_df[reduced_h20nep_df['VALID_SOURCE_MODELING'] == True]

### Picking a random source (from HDR3 only for now)  and applying an offset to that source

In [9]:
# picks a random source for us, the 1 means a single random source.
# Might need to find a different way of getting a random source, just because I don't think
# there's a way to control which source this gets, so it'll always be a different source!
# Or just run this once
random_source = signal_to_noise_interval.sample(n = 1)

In [10]:
random_source

Unnamed: 0,source_id,detectid,selected_det,ra_mean,dec_mean,fwhm,shotid,field,ra,dec,...,sn,sn_err,chi2,chi2_err,linewidth,linewidth_err,plya_classification,z_hetdex,z_hetdex_conf,combined_plae
258490,3010000264888,3002741531,False,168.646988,51.249844,1.470759,20190307026,dex-spring,168.646713,51.249657,...,9.43,0.78,2.3,0.22,9.13,2.44,0.25,0.108464,0.9,0.001


In [11]:
# We are using an offset to try to pick an area where there is no source.
# This is a value we are experimenting with, going to go with 20 arcseconds for now
offset = 20 * u.arcsec
# need to convert to degrees so I can add to the ra and dec in catalog,
# the ra and dec in catalog are in degrees
offset = offset.to('deg')

In [12]:
# Applying an offset, then we check if there is a source at the offset!
delta_ra = random_source['ra'] + offset # I forgot how units work here, are these all in arcseconds?
delta_dec = random_source['dec'] + offset # I forgot how units work here, are these all in arcseconds?

## Should I make this check an or?
Because this is saying, if the ra AND the dec are not in the catalog, then there is no source.

But if only one isn't there then it's possible the source isn't in the catalog either right?

Here I'm checking if the source in the HDR3 detections catalog? Ask about the catalog name because I think I'm getting mixed up.

When comparing the H20 NEP values to the running into

### Checking if the offset ra and dec are in either of the catalogs

In [13]:
# This truth_check df will check if the delta_ra and delta_dec are in the catalog. 
# If the size of this df is 0, then there is no source in this catalog with those specific ra and dec.
truth_check_hdr3 = signal_to_noise_interval[(signal_to_noise_interval['ra'] == delta_ra.values[0])  
                                            & (signal_to_noise_interval['dec'] == delta_dec.values[0])]

# For this truth check I have to use np.isclose. The reason is, because the RA and DEC in the HDR3 catalog 
# values only go to 7 decimals and the h20 catalog numbers go to 14 decimals. So I can't check for equality
# normality since I'll always get not equal. If I use close I can check if the numbers are close with a 
# certain chosen error.

# If later on samples seem weirdly small, I need to come back and check this!!!!!!*********
truth_check_h20 = valid_source_check[(np.isclose(valid_source_check['RA_MODELING'], delta_ra.values[0], 1e-9, 1e-10))
                                   & (np.isclose(valid_source_check['DEC_MODELING'], delta_dec.values[0], 1e-9, 1e-10))] 

In [14]:
truth_check_hdr3.size

0

In [15]:
truth_check_h20.size

0

### Now we extract!

Got a bit confused here with the slack messages. Also, wanted to make sure my previous code was right before continuing.

"You should have two RA and DEC skycoords for every coordinate in both catalogs and compare the new coordinate to these two"


## Ask about ICRS frame out of curiosity.
Read documentation but still confused!

In [21]:
sky_coords = SkyCoord(delta_ra.values[0], delta_dec.values[0], frame = 'icrs', unit = 'deg')

In [27]:
sky_coords

<SkyCoord (ICRS): (ra, dec) in deg
    (168.65226746, 51.25521088)>

In [28]:
get_spectra(sky_coords)

[INFO - 2022-10-14 02:44:21,748] Finding shots of interest
[INFO - 2022-10-14 02:44:28,262] Number of shots of interest: 4
[INFO - 2022-10-14 02:44:28,264] Extracting 4 sources
[INFO - 2022-10-14 02:44:28,817] Working on shot: 20180115011
[INFO - 2022-10-14 02:44:28,818] Working on shot: 20180212009
[INFO - 2022-10-14 02:44:28,818] Working on shot: 20190307026
[INFO - 2022-10-14 02:44:28,818] Working on shot: 20180208011
[INFO - 2022-10-14 02:44:32,102] Extraction of sources completed in 0.06 minutes.
[INFO - 2022-10-14 02:44:32,183] Retrieved 0 spectra.


ID,shotid,wavelength,spec,spec_err,apcor,flag,gal_flag,amp_flag,meteor_flag
Unnamed: 0_level_1,Unnamed: 1_level_1,Angstrom,1e-17 erg / (Angstrom cm2 s),1e-17 erg / (Angstrom cm2 s),Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
float64,float64,float64,float64,float64,float64,int64,int64,int64,int64


## NOTES

We want no source. This catalog has HETDEX detections one. 

    Want to make sure:
        1.No hetdection detec
        
        2. No imaging counterpart. Do some cross-matching. Gives us a 0 and THEN we extract. Want to extract in basically empty space. Start with 100. 
        
            Coordinates still. Trying to see if no match with the .fits file.

Start with detection. One approach was fits file with coordinates. 

Or 

Use this but expand upon it. Find RA and DEC of each shot. And randomly extract.

    delta ra and delta dec. Double check if is there a source there. 
    
For noise sample, no need to run through valentina's code. Only focus on High-z after filtering through Valentina's code.

Once we have noise sample.

Run through valentina's code. Hopefully it detects them all as high-z. Cause neither low-z or star.

Two skycoords. Check coordinates to see if HETDEX detection is there. Compare minimum separation. If the difference is smaller than 3 arcseconds. Then there is a source there, so do not extract there.