In [1]:
#!/usr/bin/env python
# %matplotlib notebook
%matplotlib inline
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
plt.rcParams['text.usetex'] = True
plt.style.use('dark_background')
# plt.rcParams["font.family"] = "Times New Roman"
# import seaborn as sns
# sns.set_style("whitegrid")

import pickle
import pathlib
import os
import sys
from prettytable import PrettyTable

from IPython.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

BADASS_DIR = pathlib.Path(os.getcwd()).resolve().parent
sys.path.insert(1,str(BADASS_DIR))
import badass as badass
import badass_check_input
import badass_ncomp_options
import badass_test_suite
# from astropy.table import Table
# tbdata = Table.read('datafile', format='fits')
# df = tbdata.to_pandas()
# Convert object/strings to utf-8
# df['class'] = df['class'].str.decode('utf-8') 
# or 
# for i,c in enumerate(df.columns):
    # if df.dtypes[i]=="object":
        # df[c] = df[c].str.decode('utf-8')
        
# Plot
# fig = plt.figure(figsize=(5,5))
# ax1 = fig.add_subplot(1,1,1)
# fontsize=16

# ax1.set_xlabel(r"",fontsize=fontsize)
# ax1.set_ylabel(r"",fontsize=fontsize)
# ax1.set_title(r"",fontsize=fontsize)
# ax1.tick_params(axis='both', labelsize=fontsize)
# ax1.legend(loc='best',fontsize=fontsize)
# plt.tight_layout()

In [80]:
verbose = True

test_options = {
"test_mode":"line",
"lines": [["NA_OIII_5007","NA_OIII_4960","NA_H_BETA"],["BR_H_BETA"]], # The lines to test
# "ranges":[(4900,5050),(4700,4940),(5100,5200)], # The range over which the test is performed must include the tested line

# Orig
"metrics": ["BADASS", "ANOVA", "CHI2_RATIO","AON"],# Fitting metrics to use when determining the best model
"thresholds": [0.95, 0.95, 0.10, 3.0],
    
# "metrics": ["CHI2_RATIO","AON"],# Fitting metrics to use when determining the best model
# "thresholds": [0.0, 3.0],    

"conv_mode": "any", # "any" single threshold satisfies the solution, or "all" must satisfy thresholds
"auto_stop":False, # automatically stop testing once threshold is reached; False test all no matter what
"full_verbose":True, # prints out all test fitting to screen
"plot_tests":True, # plot the fit of each model comparison
"force_best":True, # this forces the more-complex model to have a fit better than the previous.
"continue_fit":True, # continue the fit with the best chosen model
}

orig_line_list = {
    
    # Free Na. H-beta
    # "NA_H_BETA"      :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"na","label":r"H$\beta$","ncomp":1,},
    # "NA_H_BETA_2"    :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":2,"parent":"NA_H_BETA"},
    # "NA_H_BETA_3"    :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":3,"parent":"NA_H_BETA"}, 
    # "NA_H_BETA_4"    :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":4,"parent":"NA_H_BETA"}, 
    # "NA_H_BETA_5"    :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":5,"parent":"NA_H_BETA"}, 

    "NA_H_BETA"      :{"center":4862.691,"amp":"free","disp":"NA_OIII_5007_DISP","voff":"free","line_type":"na","label":r"H$\beta$","ncomp":1,},
    "NA_H_BETA_2"    :{"center":4862.691,"amp":"NA_H_BETA_AMP*(NA_OIII_5007_2_AMP/NA_OIII_5007_AMP)","disp":"NA_OIII_5007_2_DISP","voff":"NA_OIII_5007_2_VOFF","line_type":"na","ncomp":2,"parent":"NA_H_BETA"},
    "NA_H_BETA_3"    :{"center":4862.691,"amp":"NA_H_BETA_AMP*(NA_OIII_5007_3_AMP/NA_OIII_5007_AMP)","disp":"NA_OIII_5007_3_DISP","voff":"NA_OIII_5007_3_VOFF","line_type":"na","ncomp":3,"parent":"NA_H_BETA"}, 
    "NA_H_BETA_4"    :{"center":4862.691,"amp":"NA_H_BETA_AMP*(NA_OIII_5007_4_AMP/NA_OIII_5007_AMP)","disp":"NA_OIII_5007_4_DISP","voff":"NA_OIII_5007_4_VOFF","line_type":"na","ncomp":4,"parent":"NA_H_BETA"}, 
    "NA_H_BETA_5"    :{"center":4862.691,"amp":"NA_H_BETA_AMP*(NA_OIII_5007_5_AMP/NA_OIII_5007_AMP)","disp":"NA_OIII_5007_5_DISP","voff":"NA_OIII_5007_5_VOFF","line_type":"na","ncomp":5,"parent":"NA_H_BETA"}, 


    "NA_OIII_4960"   :{"center":4960.295,"amp":"(NA_OIII_5007_AMP/2.98)","disp":"NA_OIII_5007_DISP","voff":"NA_OIII_5007_VOFF","line_type":"na","label":r"[O III]","ncomp":1,},
    "NA_OIII_4960_2" :{"center":4960.295,"amp":"(NA_OIII_5007_2_AMP/2.98)","disp":"NA_OIII_5007_2_DISP","voff":"NA_OIII_5007_2_VOFF","line_type":"na","ncomp":2,"parent":"NA_OIII_4960"},
    "NA_OIII_4960_3" :{"center":4960.295,"amp":"(NA_OIII_5007_3_AMP/2.98)","disp":"NA_OIII_5007_3_DISP","voff":"NA_OIII_5007_3_VOFF","line_type":"na","ncomp":3,"parent":"NA_OIII_4960"},
    "NA_OIII_4960_4" :{"center":4960.295,"amp":"(NA_OIII_5007_4_AMP/2.98)","disp":"NA_OIII_5007_4_DISP","voff":"NA_OIII_5007_4_VOFF","line_type":"na","ncomp":4,"parent":"NA_OIII_4960"},
    "NA_OIII_4960_5" :{"center":4960.295,"amp":"(NA_OIII_5007_5_AMP/2.98)","disp":"NA_OIII_5007_5_DISP","voff":"NA_OIII_5007_5_VOFF","line_type":"na","ncomp":5,"parent":"NA_OIII_4960"},

    "NA_OIII_5007"   :{"center":5008.240,"amp":"free","disp":"free","voff":"free","line_type":"na","label":r"[O III]","ncomp":1,},
    "NA_OIII_5007_2" :{"center":5008.240,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":2,"parent":"NA_OIII_5007"},
    "NA_OIII_5007_3" :{"center":5008.240,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":3,"parent":"NA_OIII_5007"},
    "NA_OIII_5007_4" :{"center":5008.240,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":4,"parent":"NA_OIII_5007"},
    "NA_OIII_5007_5" :{"center":5008.240,"amp":"free","disp":"free","voff":"free","line_type":"na","ncomp":5,"parent":"NA_OIII_5007"},
    
    "BR_H_BETA"      :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"br","ncomp":1,},
    "BR_H_BETA_2"    :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"br","ncomp":2,"parent":"BR_H_BETA"},
    "BR_H_BETA_3"    :{"center":4862.691,"amp":"free","disp":"free","voff":"free","line_type":"br","ncomp":3,"parent":"BR_H_BETA"},

    "NA_UNK_1"       :{"center":5200,"line_type":"na","ncomp":1},

}
line_list = orig_line_list

In [81]:
# with open('fit_res_dict.pickle', 'rb') as handle:
#     fit_res_dict = pickle.load(handle)
    
with open('test_results.pickle', 'rb') as handle:
    test_results = pickle.load(handle)
    
test_results["TARGET_RMSE"] = [0.0680, 0.050,0.0470,0.0460,0.0450,0.068,0.068,0.065]

print(len(test_results["TEST"]))
print(len(test_results["TARGET_RMSE"]))

8
8


In [82]:
# Testing should've concluded at this stage; so now we need to check the results and determine the best line list
new_line_list = {}
rmse_thresholds = []
# for line in line_list:
#     print(line)
# sys.exit()

# Get lines that are not being tested and are not associated and add them to the new line list.
all_tested_lines = np.unique([line for group in test_options["lines"] for line in group])
for line in line_list:
    if (line in all_tested_lines) or (("parent" in line_list[line]) and (line_list[line]["parent"] in all_tested_lines)):
        pass
    else:
        new_line_list[line] = line_list[line]
# Now we check the test_results
for test in test_options["lines"]:
    res = {} # results by tested line
    for key in test_results:
        res[key] = []
    for i,t in enumerate(test_results["TEST"]):
        if t==test:
            for key in test_results:
                res[key].append(test_results[key][i])
    print(test)
    print("\t",res["TARGET_RMSE"])

    for i in range(len(res["TEST"])):
        current_metrics = {}
        target_metrics = {}
        for  m,metric in enumerate(test_options["metrics"]):
            if metric not in ["AON"]:
                current_metrics[metric] = res[metric][i]
                target_metrics[metric] = test_options["thresholds"][m]

        checked_metrics = badass_test_suite.check_test_stats(target_metrics,current_metrics)

        if test_options["conv_mode"]=="any":
            if np.any(checked_metrics) and (i==0):
                rmse_thresholds.append(np.inf)
                break
            elif np.any(checked_metrics) and (i>0) and (i<=len(res["TEST"])-1):
                max_ncomp = res["NCOMP_B"][i]
#               print(max_ncomp)
                for line in line_list:
                    if (line in test) or ((line_list[line]["ncomp"]<max_ncomp) and (("parent" in line_list[line]) and (line_list[line]["parent"] in test))):
                        new_line_list[line] = orig_line_list[line]
                        rmse_thresholds.append(res["TARGET_RMSE"][i-1])
                break
            # if reached the end and no convergence is met, use max number of components
            elif (i==len(res["TEST"])-1):
                max_ncomp = res["NCOMP_B"][i]
                for line in line_list:
                    if (line in test) or ((line_list[line]["ncomp"]<=max_ncomp) and (("parent" in line_list[line]) and (line_list[line]["parent"] in test))):
                        new_line_list[line] = orig_line_list[line]
                        rmse_thresholds.append(res["TARGET_RMSE"][i])
                break
        elif test_options["conv_mode"]=="all":
            if np.all(checked_metrics) and (i==0):
                rmse_thresholds.append(np.inf)
                break
            elif np.all(checked_metrics) and (i>0) and (i<=len(res["TEST"])-1):
                max_ncomp = res["NCOMP_B"][i]
#               print(max_ncomp)
                for line in line_list:
                    if (line in test) or ((line_list[line]["ncomp"]<max_ncomp) and (("parent" in line_list[line]) and (line_list[line]["parent"] in test))):
                        new_line_list[line] = orig_line_list[line]
                        rmse_thresholds.append(res["TARGET_RMSE"][i-1])
                break
            # if reached the end and no convergence is met, use max number of components
            elif (i==len(res["TEST"])-1):
                max_ncomp = res["NCOMP_B"][i]
                for line in line_list:
                    if (line in test) or ((line_list[line]["ncomp"]<=max_ncomp) and (("parent" in line_list[line]) and (line_list[line]["parent"] in test))):
                        new_line_list[line] = orig_line_list[line]
                        rmse_thresholds.append(res["TARGET_RMSE"][i])
                break

# Now check AON if it is a test statistic
remove_aon = []
if "AON" in test_options["metrics"]:
    aon_thresh = test_options["thresholds"][test_options["metrics"].index("AON")]
    # print(aon_thresh)
    for test in test_options["lines"]:
        # Get the NCOMP_0 vs. NCOMP_1 AON
        aon = [test_results["AON"][i] for i,t in enumerate(test_results["TEST"]) if t==test][0]
        # print(aon)
        if aon>=aon_thresh:
            break
        else:
            if verbose:
                print("\n %s line(s) does not meet amplitude-over-noise (AON) threshold.  Removing from line list." % (test))
            for line in new_line_list:
                if (line in test) or (("parent" in new_line_list[line]) and (new_line_list[line]["parent"] in test)):
                    #new_line_list.pop(line,None)
                    remove_aon.append(line)
if len(remove_aon)>0:
    for line in remove_aon:
        new_line_list.pop(line,None)
#
if verbose:
    print("\n")
    print("New Line List:")
    for line in new_line_list:
        print(line)
    print("\n")

if verbose:
    for line in new_line_list:
        print(line)
        for hpar in new_line_list[line]:
            print("\t",hpar,":",new_line_list[line][hpar])


# Print a table with the results and write it to the log
ptbl = PrettyTable()
ptbl.field_names = ["TEST","NCOMP_A","NCOMP_B","ANOVA","BADASS","CHI2_RATIO","F_RATIO","SSR_RATIO","AON","TARGET_RMSE"]
for i in range(len(test_results["TEST"])):
    ptbl.add_row([test_results["TEST"][i]]+list(np.round([test_results["NCOMP_A"][i],test_results["NCOMP_B"][i],test_results["ANOVA"][i],test_results["BADASS"][i],test_results["CHI2_RATIO"][i],test_results["F_RATIO"][i],test_results["SSR_RATIO"][i],test_results["AON"][i],test_results["TARGET_RMSE"][i]],4)))
if verbose:
    print("\n Test Results:")
    print(ptbl)
    
print(rmse_thresholds)
print(np.min(rmse_thresholds))

['NA_OIII_5007', 'NA_OIII_4960', 'NA_H_BETA']
	 [0.068, 0.05, 0.047, 0.046, 0.045]
['BR_H_BETA']
	 [0.068, 0.068, 0.065]


New Line List:
NA_UNK_1
NA_H_BETA
NA_H_BETA_2
NA_OIII_4960
NA_OIII_4960_2
NA_OIII_5007
NA_OIII_5007_2
BR_H_BETA


NA_UNK_1
	 center : 5200
	 line_type : na
	 ncomp : 1
NA_H_BETA
	 center : 4862.691
	 amp : free
	 disp : NA_OIII_5007_DISP
	 voff : free
	 line_type : na
	 label : H$\beta$
	 ncomp : 1
NA_H_BETA_2
	 center : 4862.691
	 amp : NA_H_BETA_AMP*(NA_OIII_5007_2_AMP/NA_OIII_5007_AMP)
	 disp : NA_OIII_5007_2_DISP
	 voff : NA_OIII_5007_2_VOFF
	 line_type : na
	 ncomp : 2
	 parent : NA_H_BETA
NA_OIII_4960
	 center : 4960.295
	 amp : (NA_OIII_5007_AMP/2.98)
	 disp : NA_OIII_5007_DISP
	 voff : NA_OIII_5007_VOFF
	 line_type : na
	 label : [O III]
	 ncomp : 1
NA_OIII_4960_2
	 center : 4960.295
	 amp : (NA_OIII_5007_2_AMP/2.98)
	 disp : NA_OIII_5007_2_DISP
	 voff : NA_OIII_5007_2_VOFF
	 line_type : na
	 ncomp : 2
	 parent : NA_OIII_4960
NA_OIII_5007
	 center : 5008.24