In [1]:
#!/usr/bin/env python
# %matplotlib notebook 
%matplotlib inline 

# Essential Scientific Libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None) # Beware!

# Plotting Libraries
import matplotlib.pyplot as plt
plt.rcParams['text.usetex'] = True
plt.style.use('dark_background')

# Basic Libraries
import sys
import os
import natsort
import glob

# Jupyter options
from IPython.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

from prettytable import PrettyTable

import pathlib
BADASS_DIR = pathlib.Path(os.getcwd()).resolve().parent
sys.path.insert(0, str(BADASS_DIR))
sys.path.insert(1,str(BADASS_DIR.joinpath('badass_utils'))) # utility functions
sys.path.insert(1,str(BADASS_DIR.joinpath('badass_tools'))) # tool functions

import badass_check_input as badass_check_input
import badass_test_suite  as badass_test_suite

############################################################################

# Plot
# fig = plt.figure(figsize=(6,6))
# ax1 = fig.add_subplot(1,1,1)
# fontsize=24

# ax1.set_xlabel(r"",fontsize=fontsize)
# ax1.set_ylabel(r"",fontsize=fontsize)
# ax1.tick_params(axis='both', labelsize=fontsize-4)
# ax1.legend(loc="best",fontsize=fontsize-4)
# plt.tight_layout()

############################################################################

In [135]:
configs = [
    ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007"], # Type 2 Case, single component
    ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","BR_H_BETA"], # Type 1 case, single component
    # ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","NA_H_BETA_2","NA_OIII_4960_2","NA_OIII_5007_2"], # Type 2 Case, double component,
    ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","NA_H_BETA_2","NA_OIII_4960_2","NA_OIII_5007_2","BR_H_BETA"], # Type 1 Case, double component,
    # ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","NA_H_BETA_2","NA_OIII_4960_2","NA_OIII_5007_2","BR_H_BETA","BR_H_BETA_2"], # Type 1 Case, double component,
    # ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","NA_H_BETA_2","NA_OIII_4960_2","NA_OIII_5007_2","NA_H_BETA_3","NA_OIII_4960_3","NA_OIII_5007_3"], # Type 2 Case, triple component,
    ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","NA_H_BETA_2","NA_OIII_4960_2","NA_OIII_5007_2","NA_H_BETA_3","NA_OIII_4960_3","NA_OIII_5007_3","BR_H_BETA"], # Type 1 Case, triple component,
    ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","NA_H_BETA_2","NA_OIII_4960_2","NA_OIII_5007_2","NA_H_BETA_3","NA_OIII_4960_3","NA_OIII_5007_3","BR_H_BETA","BR_H_BETA_2"], # Type 1 Case, triple component,
    # ["NA_H_BETA","NA_OIII_4960","NA_OIII_5007","NA_H_BETA_2","NA_OIII_4960_2","NA_OIII_5007_2","NA_H_BETA_3","NA_OIII_4960_3","NA_OIII_5007_3","BR_H_BETA","BR_H_BETA_2","BR_H_BETA_3"], # Type 1 Case, triple component,
    # [],
    # [],
]

test_options = {
"test_mode":"config",
"lines": configs, # The lines to test
# "ranges":[(4900,5050),(4700,4940),(5100,5200)], # The range over which the test is performed must include the tested line

# "metrics": ["BADASS", "ANOVA", "CHI2_RATIO","AON"],# Fitting metrics to use when determining the best model
# "thresholds": [1,1,1,3],
# "thresholds": [0.95, 0.95, 0.10, 3.0],
# "thresholds": [0.0,0.0,0.0,3.0],

"metrics": ["BADASS", "CHI2_RATIO", "AON"],# Fitting metrics to use when determining the best model    
# "thresholds": [1,1,3],
"thresholds": [0.95, 0.10, 100.0],
# "thresholds": [0.0,0.0,0.0],
    
"conv_mode": "any", # "any" single threshold satisfies the solution, or "all" must satisfy thresholds
"auto_stop":False, # automatically stop testing once threshold is reached; False test all no matter what
"full_verbose":True, # prints out all test fitting to screen
"plot_tests":True, # plot the fit of each model comparison
"force_best":True, # this forces the more-complex model to have a fit better than the previous.
"continue_fit":False, # continue the fit with the best chosen model
}

In [136]:
import pickle
with open("fit_res_dict.pickle","rb") as handle:
    fit_res_dict = pickle.load(handle)
with open("test_results.pickle","rb") as handle:
    test_results = pickle.load(handle)
with open("orig_line_list.pickle","rb") as handle:
    orig_line_list = pickle.load(handle)
    
for key in fit_res_dict:
    print(key)#,fit_res_dict[key]["mccomps"]["DATA"][0])

CONFIG_1
CONFIG_2
CONFIG_3
CONFIG_4
CONFIG_5


In [137]:
ptbl = PrettyTable()
ptbl.field_names = ["TEST","CONFIG_A","CONFIG_B","ANOVA","BADASS","CHI2_RATIO","F_RATIO","SSR_RATIO","TARGET_RMSE"]
for i in range(len(test_results["TEST"])):
    ptbl.add_row([test_results["TEST"][i]]+list(np.round([test_results["CONFIG_A"][i],test_results["CONFIG_B"][i],test_results["ANOVA"][i],test_results["BADASS"][i],test_results["CHI2_RATIO"][i],test_results["F_RATIO"][i],test_results["SSR_RATIO"][i],test_results["TARGET_RMSE"][i]],3)))
if True:
    print("\n Test Results:")
    print(ptbl)

for line in orig_line_list:
    print(line)
    # for hpar in orig_line_list[line]:
        # print("\t",hpar,":",orig_line_list[line][hpar])


 Test Results:
+------+----------+----------+-------+--------+------------+---------+-----------+-------------+
| TEST | CONFIG_A | CONFIG_B | ANOVA | BADASS | CHI2_RATIO | F_RATIO | SSR_RATIO | TARGET_RMSE |
+------+----------+----------+-------+--------+------------+---------+-----------+-------------+
|  1   |   1.0    |   2.0    |  1.0  |  1.0   |   0.828    |  2.185  |   4.773   |    0.068    |
|  2   |   2.0    |   3.0    |  1.0  |  1.0   |   0.247    |  1.315  |   1.728   |    0.051    |
|  3   |   3.0    |   4.0    |  1.0  | 0.558  |   0.031    |  1.058  |    1.12   |    0.049    |
|  4   |   4.0    |   5.0    |  1.0  | 0.727  |   0.062    |   1.03  |   1.062   |    0.047    |
+------+----------+----------+-------+--------+------------+---------+-----------+-------------+
NA_H_BETA
NA_H_BETA_2
NA_H_BETA_3
NA_H_BETA_4
NA_H_BETA_5
NA_OIII_4960
NA_OIII_4960_2
NA_OIII_4960_3
NA_OIII_4960_4
NA_OIII_4960_5
NA_OIII_5007
NA_OIII_5007_2
NA_OIII_5007_3
NA_OIII_5007_4
NA_OIII_5007_5
BR_H

In [138]:
verbose=True

new_line_list = {}
rmse_thresholds = []
# Get lines that are not being tested and are not associated and add them to the new line list.
all_tested_lines = np.unique([line for group in test_options["lines"] for line in group])
for line in orig_line_list:
    if (line in all_tested_lines) or (("parent" in orig_line_list[line]) and (orig_line_list[line]["parent"] in all_tested_lines)):
        pass
    else:
        new_line_list[line] = orig_line_list[line]

# Now we check the test_results
for i in range(len(test_results["TEST"])):
    current_metrics = {}
    target_metrics = {}
    for  m,metric in enumerate(test_options["metrics"]):
        if metric not in ["AON"]:
            current_metrics[metric] = test_results[metric][i]
            target_metrics[metric]  = test_options["thresholds"][m]

    checked_metrics = badass_test_suite.check_test_stats(target_metrics,current_metrics)
    print(checked_metrics)
    
    if test_options["conv_mode"]=="any":
        if np.any(checked_metrics):
            for line in test_options["lines"][i]:
                new_line_list[line] = orig_line_list[line]
            rmse_thresholds.append(test_results["TARGET_RMSE"][i])
            config_final = i+1
            break
        # if reached the end and no convergence is met, use max number of components
        elif (i==len(test_results["TEST"])-1):
            for line in test_options["lines"][i+1]:
                new_line_list[line] = orig_line_list[line]
            rmse_thresholds.append(test_results["TARGET_RMSE"][i])
            config_final = i+2
            break
            
    elif test_options["conv_mode"]=="all":
        if np.all(checked_metrics):
            for line in test_options["lines"][i]:
                new_line_list[line] = orig_line_list[line]
            rmse_thresholds.append(test_results["TARGET_RMSE"][i])
            config_final = i+1
            break
        # if reached the end and no convergence is met, use max number of components
        elif (i==len(test_results["TEST"])-1):
            for line in test_options["lines"][i+1]:
                new_line_list[line] = orig_line_list[line]
            rmse_thresholds.append(test_results["TARGET_RMSE"][i])
            config_final = i+2
            break

print(config_final)
# print("\n")
# for line in new_line_list:
#     print(line)
#     for hpar in new_line_list[line]:
#         print("\t",hpar,":",new_line_list[line][hpar])            
            
# check SNR (AON) level and prune any lines that don't satisfy the requirement
remove_aon = []
if "AON" in test_options["metrics"]:
    aon_thresh = test_options["thresholds"][test_options["metrics"].index("AON")]
    for line in test_options["lines"][config_final-1]:
        # Construct line familys (groups of parent and child lines)
        line_family = []
        if ("ncomp" in new_line_list[line]) and (new_line_list[line]["ncomp"]==1):
            line_family.append(line)
            for child in new_line_list:
                if ("parent" in new_line_list[child]) and (new_line_list[child]["parent"]==line):
                    line_family.append(child)
        
        avg_noise = np.nanmean(fit_res_dict["CONFIG_%d" % config_final]["mccomps"]["NOISE"][0])
        if len(line_family)>0:
            print(line_family)
            comb_line = np.zeros(len(fit_res_dict["CONFIG_%d" % config_final]["mccomps"]["DATA"][0]))
            for l in line_family:
                comb_line+=fit_res_dict["CONFIG_%d" % config_final]["mccomps"][l][0]
            aon = np.nanmax(comb_line)/avg_noise
            print(aon)
            if aon<aon_thresh:
                if verbose:
                    print("\n %s line(s) does not meet amplitude-over-noise (AON) threshold.  Removing from lines list." % (line_family))
                for l in line_family:
                    remove_aon.append(l)
if len(remove_aon)>0:
        for line in remove_aon:
            new_line_list.pop(line,None)
#
print("\n")
for line in new_line_list:
    print(line)
    for hpar in new_line_list[line]:
        print("\t",hpar,":",new_line_list[line][hpar])
        
print(rmse_thresholds)

[False, False]
[False, False]
[True, True]
3
['NA_H_BETA', 'NA_H_BETA_2']
19.497331250232257

 ['NA_H_BETA', 'NA_H_BETA_2'] line(s) does not meet amplitude-over-noise (AON) threshold.  Removing from lines list.
['NA_OIII_4960', 'NA_OIII_4960_2']
68.2783948210862

 ['NA_OIII_4960', 'NA_OIII_4960_2'] line(s) does not meet amplitude-over-noise (AON) threshold.  Removing from lines list.
['NA_OIII_5007', 'NA_OIII_5007_2']
203.66865811777433
['BR_H_BETA']
24.988164839151874

 ['BR_H_BETA'] line(s) does not meet amplitude-over-noise (AON) threshold.  Removing from lines list.


NA_UNK_1
	 center : 5200
	 line_type : na
	 line_profile : gaussian
	 amp : free
	 disp : free
	 voff : free
	 ncomp : 1
	 center_pix : 726.478971008575
	 disp_res_ang : 0.7773961211552205
	 disp_res_kms : 44.81874884630564
NA_OIII_5007
	 center : 5008.24
	 amp : free
	 disp : free
	 voff : free
	 line_type : na
	 label : [O III]
	 ncomp : 1
	 line_profile : gaussian
	 center_pix : 563.2959830508472
	 disp_res_ang : 0