# Supervised Learning and Emission Line Nebulae
This notebook explores the use of an ANN to do rapid BPT classifications.

The basic outline is as follows:
   1.  Obtain emission line ratios from 3Mdb with associated emission type 
   (i.e. Planetary Nebula, Supernova Remnant, etc.)
   2. Apply ANN to line ratios to find categorizing
   3. Verify results using emission type information
   

In [1]:
#%matplotlib widget
import pylab as pl
import numpy as np
import random
from astropy.io import fits
import datetime
from tqdm import tqdm_notebook as tqdm
import pymysql
import pandas as pd
import warnings
import scipy.special as ss
warnings.filterwarnings("ignore")


In [2]:
"""
Calculate the Flux of an emission line for SITELLE given the amplitude, broadening,
resolution, and wavenumber
"""
import numpy as np
import scipy.special as ss
def ampToFlux(ampl, broad, res, wvn):
    """
    ampl - amplitude
    broad - broadening
    res - spectral resolution
    wvn - wavenumber of emission line (cm^-1)
    """
    flux = np.sqrt(2*np.pi)*ampl*broad/ss.erf((2*wvn*broad)/(1.20671*res))
    return flux 


## Step 1: Download Emission Line information from 3Mdb

In [3]:
# First we load in the parameters needed to login to the sql database
MdB_HOST='3mdb.astro.unam.mx'
MdB_USER='OVN_user'
MdB_PASSWD='oiii5007'
MdB_PORT='3306'
MdB_DBs='3MdBs'
MdB_DBp='3MdB'
MdB_DB_17='3MdB_17'

# Now we connect to the database
co = pymysql.connect(host=MdB_HOST, db=MdB_DB_17, user=MdB_USER, passwd=MdB_PASSWD)
# Now we get the lines we want
HII_ampls = pd.read_sql("select H__1_656281A as h1, O__3_500684A as O3, N__2_658345A as n2, N__2_654805A as n1, \
                  S__2_673082A  as s2, S__2_671644A as s1, H__1_486133A as hb, O__1_630030A as O1,   \
                  O__2_372603A as O2,\
                  com1 as U, com2 as gf, com4 as ab \
                  from tab_17 \
                  where ref = 'BOND_2'"
                    , con=co)   


filter1 = HII_ampls['U'] == 'lU_mean = -2.5'
filter2 = HII_ampls['U'] == 'lU_mean = -3.0'
filter3 = HII_ampls['U'] == 'lU_mean = -3.5'
filter4 = HII_ampls['gf'] == 'fr = 3.0'
ampls_filter = HII_ampls.where(filter1 | filter2 | filter3 & filter4).dropna()
HII_ampls = ampls_filter.reset_index(drop=True)
# We want values between -3.0 and -5.4
# This is a terrible way to do this .. but ok
fil1 = HII_ampls['ab'] == 'ab_O = -3.0' 
fil2 = HII_ampls['ab'] == 'ab_O = -3.1999999999999993' 
fil3 = HII_ampls['ab'] == 'ab_O = -3.4000000000000004' 
fil4 = HII_ampls['ab'] == 'ab_O = -3.5999999999999996' 
fil5 = HII_ampls['ab'] == 'ab_O = -3.8000000000000007' 
fil6 = HII_ampls['ab'] == 'ab_O = -4.0'
fil7 = HII_ampls['ab'] == 'ab_O = -4.2' 
fil8 = HII_ampls['ab'] == 'ab_O = -4.4' 
fil9 = HII_ampls['ab'] == 'ab_O = -4.6000000000000005' 
fil10 = HII_ampls['ab'] == 'ab_O = -4.800000000000001' 
fil11 = HII_ampls['ab'] == 'ab_O = -5.0' 
fil12 = HII_ampls['ab'] == 'ab_O = -5.2' 
fil13 = HII_ampls['ab'] == 'ab_O = -5.4' 
ampls_filter2 = HII_ampls.where(fil1 | fil2 | fil3 | fil4 | fil5 | fil6 | fil7 | fil8 | fil9 | fil10 | fil11 | fil12 | fil13 ).dropna()
HII_ampls = ampls_filter2.reset_index(drop=True)



Pne_ampls = pd.read_sql("select H__1_656281A as h1, O__3_500684A as O3, N__2_658345A as n2, N__2_654805A as n1, \
                  S__2_673082A  as s2, S__2_671644A as s1, H__1_486133A as hb, O__1_630030A as O1,   \
                  O__2_372603A as O2 \
                  from tab_17 \
                  where ref = 'PNe_2021' AND com6=1"
                    , con=co)   

print("We have %i HII regions"%len(HII_ampls))
print("We have %i PNe regions"%len(Pne_ampls))


We have 35100 HII regions
We have 44245 PNe regions


In [4]:
HII_ampls

Unnamed: 0,h1,O3,n2,n1,s2,s1,hb,O1,O2,U,gf,ab
0,2.423468e+34,1.487054e+34,1.824563e+34,6.189549e+33,1.582750e+33,2.099474e+33,8.685931e+33,5.411352e+32,8.931309e+33,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
1,2.268429e+33,3.877971e+33,2.038031e+32,6.913796e+31,2.023898e+31,2.639084e+31,8.267306e+32,1.237395e+29,1.502413e+32,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
2,4.501052e+33,6.798347e+33,7.477893e+32,2.536758e+32,6.411098e+31,8.357841e+31,1.632939e+33,7.223600e+29,5.028867e+32,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
3,6.184303e+35,5.099142e+35,4.938637e+34,1.675352e+34,6.186150e+34,8.111407e+34,2.197038e+35,1.431199e+34,3.562803e+35,lU_mean = -2.5,fr = 0.03,ab_O = -3.1999999999999993
4,6.505400e+33,8.893684e+33,1.508617e+33,5.117729e+32,1.177849e+32,1.535362e+32,2.356498e+33,1.909016e+30,9.603714e+32,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
...,...,...,...,...,...,...,...,...,...,...,...,...
35095,1.230992e+37,7.470579e+35,1.772154e+37,6.011751e+36,9.898216e+35,1.261538e+36,4.260228e+36,3.851149e+34,9.151701e+35,lU_mean = -3.5,fr = 3.0,ab_O = -3.0
35096,1.158163e+37,1.231852e+35,1.393492e+37,4.727191e+36,1.342362e+36,1.716309e+36,4.059115e+36,4.733964e+34,1.613486e+36,lU_mean = -3.5,fr = 3.0,ab_O = -3.0
35097,1.356379e+37,7.748276e+34,1.434222e+37,4.865368e+36,9.427064e+35,1.200496e+36,4.693777e+36,2.766443e+34,5.544589e+35,lU_mean = -3.5,fr = 3.0,ab_O = -3.0
35098,1.385198e+37,7.798933e+35,2.229981e+37,7.564848e+36,1.297645e+36,1.660016e+36,4.797853e+36,7.497637e+34,1.226558e+36,lU_mean = -3.5,fr = 3.0,ab_O = -3.0


In [5]:
HII_ampls

Unnamed: 0,h1,O3,n2,n1,s2,s1,hb,O1,O2,U,gf,ab
0,2.423468e+34,1.487054e+34,1.824563e+34,6.189549e+33,1.582750e+33,2.099474e+33,8.685931e+33,5.411352e+32,8.931309e+33,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
1,2.268429e+33,3.877971e+33,2.038031e+32,6.913796e+31,2.023898e+31,2.639084e+31,8.267306e+32,1.237395e+29,1.502413e+32,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
2,4.501052e+33,6.798347e+33,7.477893e+32,2.536758e+32,6.411098e+31,8.357841e+31,1.632939e+33,7.223600e+29,5.028867e+32,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
3,6.184303e+35,5.099142e+35,4.938637e+34,1.675352e+34,6.186150e+34,8.111407e+34,2.197038e+35,1.431199e+34,3.562803e+35,lU_mean = -2.5,fr = 0.03,ab_O = -3.1999999999999993
4,6.505400e+33,8.893684e+33,1.508617e+33,5.117729e+32,1.177849e+32,1.535362e+32,2.356498e+33,1.909016e+30,9.603714e+32,lU_mean = -3.0,fr = 0.03,ab_O = -4.0
...,...,...,...,...,...,...,...,...,...,...,...,...
35095,1.230992e+37,7.470579e+35,1.772154e+37,6.011751e+36,9.898216e+35,1.261538e+36,4.260228e+36,3.851149e+34,9.151701e+35,lU_mean = -3.5,fr = 3.0,ab_O = -3.0
35096,1.158163e+37,1.231852e+35,1.393492e+37,4.727191e+36,1.342362e+36,1.716309e+36,4.059115e+36,4.733964e+34,1.613486e+36,lU_mean = -3.5,fr = 3.0,ab_O = -3.0
35097,1.356379e+37,7.748276e+34,1.434222e+37,4.865368e+36,9.427064e+35,1.200496e+36,4.693777e+36,2.766443e+34,5.544589e+35,lU_mean = -3.5,fr = 3.0,ab_O = -3.0
35098,1.385198e+37,7.798933e+35,2.229981e+37,7.564848e+36,1.297645e+36,1.660016e+36,4.797853e+36,7.497637e+34,1.226558e+36,lU_mean = -3.5,fr = 3.0,ab_O = -3.0


In [6]:
co = pymysql.connect(host=MdB_HOST, db=MdB_DBp, user=MdB_USER, passwd=MdB_PASSWD)
DIG_ampls = pd.read_sql("select H__1__6563A as h1, O__3__5007A as O3, N__2__6584A as n2, N__2__6548A as n1, \
                  S_II__6731A  as s2, S_II__6716A as s1, H__1__4861A as hb,  O_II__3726A as O2 \
                  from tab \
                  where ref = 'DIG_HR'"
                    , con=co)

print("We have %i DIG regions"%len(DIG_ampls))

We have 41327 DIG regions


In [7]:
# Now we connect to the shock database
co = pymysql.connect(host=MdB_HOST, db=MdB_DBs, user=MdB_USER, passwd=MdB_PASSWD)
SNR_ampls = pd.read_sql("""SELECT shock_params.shck_vel AS shck_vel, 
                         shock_params.mag_fld AS mag_fld,
                         emis_VI.NII_6548 AS n1,
                         emis_VI.NII_6583 AS n2,
                         emis_VI.HI_6563 AS h1,
                         emis_VI.OIII_5007 AS O3,
                         emis_VI.HI_4861 AS hb,
                         emis_VI.SII_6731 AS s2,
                         emis_VI.SII_6716 AS s1,
                         emis_VI.OII_3726 AS O2
                         FROM shock_params 
                         INNER JOIN emis_VI ON emis_VI.ModelID=shock_params.ModelID
                         INNER JOIN abundances ON abundances.AbundID=shock_params.AbundID
                         WHERE emis_VI.model_type='shock' 
                         AND abundances.name='Allen2008_Solar'
                         ORDER BY shck_vel, mag_fld;""", con=co)
print("We have %i SNR regions"%len(SNR_ampls))

We have 58903 SNR regions


## Step 2: Create Synthetic Spectra
We will be using the following lines for categorization:

Halpha,
Hbeta,
[OIII]5007,
[OI]63000,
[NII]6548,
[NII]6583,
[SII]6716,
[SII]6731

In [8]:
# Set observation parameters
resolution = 5000
vel_num = 2000  # Number of Velocity Values Sampled
broad_num = 1000  # Number of Broadening Values Sampled
# Sample velocity
#vel_ = np.random.uniform(-200,200,vel_num)
# Sample broadening
broad_ = np.random.uniform(10,200,broad_num)
# Same resolution
res_3 = np.random.uniform(resolution-200, resolution, 200)
res_2 = np.random.uniform(1600,1800,100)

In [11]:
## We now can model the lines. For the moment, we will assume all lines have the same velocity and broadening
# Do this for randomized combinations of vel_ and broad_
for ct_type, ampls_filter in enumerate([HII_ampls, Pne_ampls, SNR_ampls]):#, DIG_ampls]):  # Create both HII and PNe
    ct = 0  # Number of spectra
    if ct_type == 0:
        spec_type = 'HII'
        ampls = HII_ampls
    elif ct_type == 1:
        spec_type = 'PNe'
        ampls = Pne_ampls
    elif ct_type == 2:
        spec_type = 'SNR'
        ampls = SNR_ampls
    #elif ct_type == 3:
    ##    spec_type = 'DIG'
     #   ampls = DIG_ampls
    # Now create spectra
    with open('Ratios_'+spec_type+'_ratios.txt', 'w') as f:
        f2 = open('Ratios_'+spec_type+'_fluxes.txt', 'w')
        f2.write("Ct OIII SII NII OII Ha Hb\n")
        ct = 0
        for spec_ct in range(len(ampls)):
            spectrum = None  # Intialize
            pick_new = True
            # Randomly select velocity and broadening parameter and theta
            #velocity = random.choice(vel_)
            broadening = random.choice(broad_)
            resolution = random.choice(res_3)
            resolution2 = random.choice(res_2)
            sim_vals = ampls.iloc[spec_ct]
            pass_ct = 10  # Number of lines for which the criteria is meet
            #for line in [sim_vals['O2'],sim_vals['O3'],sim_vals['n2'],sim_vals['s2'],sim_vals['hb']]:
            #    if line/sim_vals['h1'] > 0.01:  # If greater than x-%
            #        pass_ct += 1  # Another line has passed
            #    else:
            #        break  # No need to check any others
            if pass_ct > 4: # If all lines and ratios pass
                ha = sim_vals['h1']#+np.random.normal(0,5)*(0.01*sim_vals['h1'])
                n1 = sim_vals['n1']#+np.random.normal(0,5)*(0.01*sim_vals['n1'])
                n2 = sim_vals['n2']#+np.random.normal(0,5)*(0.01*sim_vals['n2'])
                s1 = sim_vals['s1']#+np.random.normal(0,5)*(0.01*sim_vals['s1'])
                s2 = sim_vals['s2']#+np.random.normal(0,5)*(0.01*sim_vals['s2'])
                O3 = sim_vals['O3']#+np.random.normal(0,5)*(0.01*sim_vals['O3'])
                O2 = sim_vals['O2']#+np.random.normal(0,5)*(0.01*sim_vals['O2'])
                hb = sim_vals['hb']#+np.random.normal(0,5)*(0.01*sim_vals['hb'])
                f.write('%i %.4E %.4E %.4E %.4E %.4E %.4E %.4E %.4E\n'%(spec_ct, O3/hb, (s1+s2)/ha, n2/ha, O2/O3, O2/hb, O2/ha, ha/hb, ha))
                f2.write('%i %.4E %.4E %.4E %.4E %.4E %.4E\n'%(spec_ct, O3, (s1+s2), n2, O2, ha, hb))
                ct += 1
        f2.close()
        print("We have %i spectra for %s regions"%(ct, spec_type))



We have 35100 spectra for HII regions
We have 44245 spectra for PNe regions
We have 58903 spectra for SNR regions
