In [None]:
%matplotlib inline  
from models.binary_model.binary_model import BinaryModel
from models.ind_model.ind_model import OvAModel
from models.multi_model.multi_model import MultiModel
mags = ["g_mag",  "r_mag", "i_mag", "z_mag", "y_mag",
        "W1_mag", "W2_mag",
        "J_mag", "K_mag", "H_mag", "redshift"]

model = MultiModel(
       cols = mags, 
       folds = 10,     
       transform_features = True,
       balanced_purity = True)
#        lsst_test= True) 

model.run_model()

In [None]:
import numpy as np
X_test_list = [] # All rows in X test (all data) in right order
y_test_label_list = [] # Probs assigned to each class, and label in last column

for data in model.datas:
    X_test_cur, test_probs = data
    X_test_list.append(X_test_cur)
    y_test_label_list.append(test_probs)

X_test = pd.concat(X_test_list)
y_test_label = np.concatenate(y_test_label_list)

In [None]:
if X_test.shape[0] == y_test_label.shape[0] and y_test_label.shape[0] == model.X.shape[0]:
    print("Dimensions correct.")
else:
    raise ValueError("DIMENSIONS INCORRECT.")

In [None]:
# get all indices FNs vs TP GRBs, and plot their feature dists
FN_GRBs_indices = []
TP_GRBs_indices = []
label_index = len(model.class_labels)
for i, row in X_test.iterrows():
    row = y_test_label[i]
    if "GRB" in row[label_index]:
        max_class_prob = np.max(row[: len(row) - 1])
        max_class_index = np.argmax(row[: len(row) - 1])
        max_class_name = model.class_labels[max_class_index]
        
        if max_class_name == "GRB":
            TP_GRBs_indices.append(i)
        else:
            FN_GRBs_indices.append(i)


In [None]:
from thex_data.data_consts import * 
from scipy.stats import norm
from utilities import utilities as util
import matplotlib.pyplot as plt

def cust_relabel(labels, class_labels):
    """
    Rename list of labels to be class names.
    """ 
    new_labels = []
    for row_label in labels: 
        for class_name in class_labels:
            if class_name in row_label:
                new_labels.append(class_name) 
                break
    return new_labels


def plot_dist(ax, data, min_value, max_value, color, class_name):
    """
    Plots the normal distribution for given data 
    """
    bins = np.linspace(0, max_value, 50)  
    mean, std = norm.fit(data)
    x = np.linspace(min_value, max_value, 100)
    y = norm.pdf(x, mean, std)
    print(class_name + " mean: " + str(mean) + " stdev: " + str(std))
    ax.plot(x, y, color=color, label=class_name) 

def plot_classes_feature_dist(ax,  df, feature, min_value, max_value,  class_labels):
    """
    Plots the normal distribution of each transient type in df over 'feature'
    :param model_dir: directory of model to save figure
    :param df: DataFrame with both feature column and TARGET_LABEL column
    :param feature: Name of feature to plot distribution over
    :param class_labels: class labels
    """
    
    bins = np.linspace(0, max_value, 50)
    colors = plt.get_cmap('tab20').colors
    for index, class_name in enumerate(class_labels):
        class_values = df[df[TARGET_LABEL] == class_name][feature].values
        plot_dist(ax, class_values, min_value, max_value, colors[index], class_name) 

In [None]:
y_test_label[:,len(model.class_labels)]

y_r = cust_relabel(y_test_label[:,len(model.class_labels)], model.class_labels)

full_df = X_test.copy()
full_df[TARGET_LABEL] =y_r 
import matplotlib.pyplot as plt

# Plot feature dists
for feature in ['W1_mag_minus_W2_mag']:
    min_value = X_test[feature].min()
    max_value = X_test[feature].max()
    f, ax = plt.subplots(figsize=(FIG_WIDTH, FIG_HEIGHT), dpi=DPI) 
    
    use_classes = model.class_labels # ["Unspecified Ia", "Unspecified II"]
    plot_classes_feature_dist(ax, full_df, feature, min_value, max_value, use_classes)
    
    # Plot TP GRBs
    plot_dist(ax, X_test.loc[TP_GRBs_indices, :][feature].values, min_value, max_value, "black", "TP GRB")
    
    # Plot FN GRBs
    plot_dist(ax, X_test.loc[FN_GRBs_indices, :][feature].values, min_value, max_value, "red", "FN GRB")
    
    plt.xlabel(feature, fontsize=LAB_S)
    plt.ylabel("Normalized density", fontsize=LAB_S)
    plt.legend(loc='best')
    util.display_and_save_plot(model.dir, "feature_" + str(feature))