In [1]:
# Using ML to classify LAEs in the NEP Field
import tables as tb
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

from astropy import constants as const
from astropy.table import Table, column, join
from astropy.io import fits
import astropy.units as u
from astropy.coordinates import SkyCoord
from astropy.visualization import ZScaleInterval

from regions import CircleSkyRegion, CirclePixelRegion

from hetdex_api.survey import Survey, FiberIndex
from hetdex_api.config import HDRconfig
from hetdex_api.detections import Detections
from hetdex_api.elixer_widget_cls import ElixerWidget

from hetdex_tools.get_spec import get_spectra

import pandas as pd
import seaborn as sns

In [2]:
det_object = Detections('hdr2.1', loadtable = False)

In [3]:
# Once it has loaded you want to filter out the data by selecting those that are in the NEP field
# to do this I will give you the verticies of a box that will encompass all the NEP field - Oscar

# The center of the NEP field is given by:
# NEP Central Coordinates:
# R.A. = 18hours00minutes00seconds, decl. = 66 degree 33minute 38.552 arcmin
# Then make a radius of 3.5 degrees centered above and find all the RA and DEC coordinates
# in the DF that are within this circle

# creating the circle region in the sky (NEP field)
ra = '18h00m00s'
dec = '+66d33m38.552s'
center_sky_coords = SkyCoord(ra, dec, frame = 'icrs')

maskregion = det_object.query_by_coords(center_sky_coords, 3.5 * u.deg)
detects_in_NEP = det_object[maskregion]     # Sources within the NEP footprint

In [17]:
spec_test = detects_in_NEP.get_spectrum(detects_in_NEP.detectid[0])

In [20]:
spec_test

wave1d,spec1d,spec1d_err
Angstrom,1e-17 erg / (Angstrom cm2 s),1e-17 erg / (Angstrom cm2 s)
float32,float32,float32
3470.0,1.3739702,3.301174
3472.0,1.3739702,3.301174
3474.0,1.3739702,3.301174
3476.0,1.3739702,3.301174
3478.0,1.3739702,3.301174
3480.0,1.3739702,3.301174
3482.0,1.3739702,3.301174
3484.0,1.3739702,3.301174
3486.0,1.3739702,3.301174
...,...,...


In [6]:
num_detects = np.size(detects_in_NEP.detectid)    # getting the number of detections in NEP which is 69799

# this part is creating a 2d list which will then be inputting into a dictionary to be converted into an astropy Table
# 2d list because I wanted a list of lists to hold all the spec1d and spec1d error values
# TAKES A LONG TIME TO RUN!!!
rows, cols = (num_detects, 1)
spec_ls = [[0 for i in range(cols)] for j in range(rows)]

# going from a list to an astropy array
for i in range(len(spec_ls)):
    spec_table = detects_in_NEP.get_spectrum(detects_in_NEP.detectid[i])
    spec_ls[i] = np.array(spec_table['spec1d'])    # turned into array because astropy column ['spec1d'] was confusing to work with

In [7]:
# same code as above except this one is for the spec1d error column
# only change is getting erorr numbers so all past comments apply to here too
# I would've put them in the same block but these blocks take awhile to finish running so I didn't want to overload
specErr_ls = [[0 for i in range(cols)] for j in range(rows)]

# going from a list to an astropy array
for i in range(len(specErr_ls)):
    spec_table = detects_in_NEP.get_spectrum(detects_in_NEP.detectid[i])
    specErr_ls[i] = np.array(spec_table['spec1d_err'])

Want to make a table where the first row is the detectid, then I have rows for, spect1d  (array), spec1d error (array), line info like EW, line flux, magnitude, RA, and DEC.

In [8]:
# Using a dict of column data to initialize an astropy Table
dic_to_table = {'detects': detects_in_NEP.detectid,
               'wavelength': detects_in_NEP.wave,
                'ra': detects_in_NEP.ra,
                'dec': detects_in_NEP.dec,
                'spec1d': spec_ls,
               'spec1d_err': specErr_ls}

detect_info = Table(dic_to_table)

In [21]:
detect_info

detects,wavelength,ra,dec,spec1d,spec1d_err
int64,float32,float32,float32,float32[1036],float32[1036]
2100395300,4795.6,274.08817,66.04029,1.3739702 .. 1.9345994,3.301174 .. 1.8484716
2100395301,3684.0,274.10938,66.03482,0.25948396 .. 0.104944706,4.9196553 .. 1.5617634
2100395303,4407.54,274.08798,66.04312,-0.18306294 .. 0.120995596,3.343359 .. 1.752679
2100395308,3779.01,274.16028,66.039375,3.5947866 .. 29.83107,5.0278735 .. 3.9272287
2100395309,3779.08,274.176,66.03694,7.810404 .. 8.485255,4.6782527 .. 2.2743156
2100395310,3777.96,274.16217,66.03878,5.4102297 .. 16.955706,5.465975 .. 3.143807
2100395312,3540.37,274.16037,66.039116,7.232119 .. 36.326458,5.369271 .. 4.3121314
2100395314,3779.73,274.17523,66.03811,5.690473 .. 39.514835,4.060331 .. 5.03607
2100395323,3783.45,274.17462,66.03752,6.0763054 .. 14.341705,4.181327 .. 2.8485444
2100395329,3911.59,274.1744,66.03752,3.6374705 .. 9.342827,3.9804099 .. 2.4529374


Got: the spectra, wavelength, RA and DEC
Need?: Line information such as EW, line flux and Magnitude of the source
    - Not needed for first part. 

Combination of spec1d divided by spec1d erorr should give a signal to noise specturm. Should suppress noisy stuff and if detection should be more clear. Do a wavelength cut. 3500 - 5500 cut. Cut first 100 angstroms. 

Want like a probability for Phase 1. 

Next steps (PHASE 1):
1. Read on machine learning algorithms
    1a. Learn how to implement
2. Figure out what data exactly is needed
3. Begin training

Phase 2 is use EW, line flux, Magnitude. Will use output from Phase 1.

In [10]:
# Once you have selected sources within the NEP footprint we can then go ahead and find some
# spectra from these sources - Oscar
spectra = detects_in_NEP.hdfile.root.Spectra

center_ksy.separations(skycoords entire)
    return indeces
    return distance
    return 3d

separate by dist
mask

main goal of algorithm
want it to distinguish lae vs o2 emitter. wouldn't impose cuts unless training.

cut = filter
cut = signal to noise could do plya

might need cuts for taining for confident lya and o2

increase confidence by visually inspecting

could visually inspect to increase confidence.

plotting histograms to look for outliers

hetdex isn't perfect and it catches emission lines that aren't real. visual inspections helps

no need for dataframes if i found another way

save as csv with astropy table

csv into get_spec()

    has nice documentation
        
get_spectrum good for ids ****USE*** returns all fiber spec with corresponding weights.

    try to see if can get LAE samples. O2 samples. and ambigious samples. Clasify some of them. Signal to noise (ask Oscar)
    
    
detection object filter by fields. turn to astropy table and then filter. 

Want psf weighted.

reionization was the last major change in universe. hetdex gives us a way to study that period from a lower redshift/