## Functionality to combine visualizations

### Combine curves/rates

In [None]:
import warnings
warnings.filterwarnings('ignore')
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
%matplotlib inline  
from models.binary_model.binary_model import BinaryModel
from models.ind_model.ind_model import OvAModel
from models.multi_model.multi_model import MultiModel

from mainmodel.helper_plotting import *

mags = ["g_mag",  "r_mag", "i_mag", "z_mag", "y_mag",
        "W1_mag", "W2_mag",
        "J_mag", "K_mag", "H_mag"]


exp_dir = "/Users/marina/Documents/PhD/research/astro_research/experiments/paper_set/"

In [None]:

  

# Multi 
multi_model = MultiModel(folds = 10, min_class_size = 40,  
                   transform_features = True, cols = mags) 
multi_model=load_prev_exp(exp_dir, "multi/Multiclass_Classifier1/", multi_model)




In [None]:

# Binary
model2 = BinaryModel(folds = 10, min_class_size = 40,  
                     transform_features = True, cols = mags) 
binary_model = load_prev_exp(exp_dir, "binary/Binary_Classifiers2/", model2)


In [None]:
# OvA 
ensemble_model = OvAModel(folds = 10, 
                  init_from_binary = binary_model,
                  min_class_size = 40,  
                   transform_features = True, 
                  cols = mags) 
import time
ensemble_model.results = ensemble_model.run_cfv(time.time())
# ensemble_model.range_metrics = ensemble_model.compute_probability_range_metrics(
#         ensemble_model.results, bin_size=0.2)
# ensemble_model.range_metrics_10 = ensemble_model.compute_probability_range_metrics(
#         ensemble_model.results, bin_size=0.1)

In [None]:
ensemble_model.visualize_performance()

## Plot prob metrics

In [None]:
# import matplotlib
# matplotlib.font_manager.fontManager.ttflist

In [None]:

m = ensemble_model

# Replot metrics
N = m.num_runs if m.num_runs is not None else m.num_folds
pc_per_trial = m.get_pc_per_trial(m.results)
ps, cs = m.get_avg_pc(pc_per_trial, N)

m.plot_all_metrics(ps, cs, pc_per_trial, m.y)

## Plot prob rates

In [None]:
# binary_model.class_prob_rates["Unspecified II"]
model = multi_model
for class_name in model.class_prob_rates.keys():
    rates=model.class_prob_rates[class_name]
    a=(rates[0]+rates[1])/2
    b=(rates[2]+rates[3])/2
    c=(rates[4]+rates[5])/2
    d=(rates[6]+rates[7])/2
    e=(rates[8]+rates[9])/2
    new_rates = [a,b,c,d,e]
    model.class_prob_rates[class_name] = new_rates

In [None]:
plot_rates_together(binary_model, ensemble_model, multi_model, indices=[0,1,2,3,4,5,6])
plot_rates_together(binary_model, ensemble_model, multi_model, indices=[7,8,9,10,11,12,13])

In [None]:
## GOOD PERFORMING
# Ia, Ia-91bg, II, II P, TDE, GRB

wanted_classes = ["Unspecified Ia", "Ia-91bg", "Unspecified II", "II P", "TDE", "GRB"]
indices= []
for index, cn in enumerate(ensemble_model.class_labels):
    if cn in wanted_classes:
        indices.append(index)
indices

plot_rates_together(binary_model, ensemble_model, multi_model, indices=indices)

In [None]:
plot_pc_curves_together(binary_model, ensemble_model, multi_model, indices=indices)

In [None]:
plot_pc_curves_together(binary_model, ensemble_model, multi_model, indices=[0,1,2,3,4,5,6])
plot_pc_curves_together(binary_model, ensemble_model, multi_model, indices=[7,8,9,10,11,12,13])


## Combine purity/comp plots
for priors vs no priors

In [None]:

from mainmodel.helper_compute import *
from thex_data.data_consts import * 

def get_mets_for_model(model, met_type):
    
    pc_per_trial = model.get_pc_per_trial(model.results)
    ps, cs = model.get_avg_pc(pc_per_trial, model.num_folds)
    c_baselines, p_baselines = compute_baselines(
            model.class_counts, model.class_labels, model.y, len(model.class_labels), model.class_priors)

    p_intvls, c_intvls = compute_confintvls(pc_per_trial, model.class_labels)
    if met_type == "Purity":
#         list(d.values())
        return model.class_labels, list(ps.values()), list(p_baselines.values()), list(p_intvls.values())
    else:
        return model.class_labels, list(cs.values()), list(c_baselines.values()), list(c_intvls.values())
         

def plot_together(m1, m2, m1_name, m2_name, plot_type = "Purity"):
    """
    Plot the metrics of 2 models side by side
    """
    m1_class_names, m1_metrics, m1_b, m1_intvls  = get_mets_for_model(model=m1, met_type=plot_type)
    m2_class_names, m2_metrics, m2_b, m2_intvls  = get_mets_for_model(model=m2, met_type=plot_type)
    fig, ax = plt.subplots(figsize=(FIG_WIDTH, FIG_HEIGHT), dpi=200,
                           tight_layout=True, sharex=True, sharey=True)

 
    BLUE = "#1f77b4"
    GREEN = "#00ffbf"
    def plot_m(ax, indices, errs, baselines, metrics, name, color):
        bar_width=0.1
        ax.barh(y=indices, 
                width=metrics, 
                height=bar_width, 
                xerr=errs,
                capsize=2, 
                edgecolor='black', ecolor='coral', 
                color=color,
                label=name)
        for index, baseline in enumerate(baselines):
            y_val = indices[index]
            plt.vlines(x=baseline,
                       ymin=y_val - (bar_width / 2),
                       ymax=y_val + (bar_width / 2),
                       linestyles='--', colors='red')
    m1_errs = prep_err_bars(m1_intvls, m1_metrics) 
    m1_indices =[0.2, 0.42, 0.64, 0.86]
    # with priors
    plot_m(ax=ax, indices=m1_indices, errs=m1_errs, baselines=m1_b, 
           metrics=m1_metrics, color=GREEN, name=m1_name)
    m2_errs = prep_err_bars(m2_intvls, m2_metrics)
    plot_m(ax=ax, indices=[0.1, 0.32, 0.54, 0.76], errs=m2_errs, baselines=m2_b, 
           metrics=m2_metrics, color=BLUE, name=m2_name)

    # Figure formatting
    ax.set_xlim(0, 1)
    plt.legend(fontsize=LAB_S)
    plt.xticks(list(np.linspace(0, 1, 11)), [
                       str(tick) + "%" for tick in list(range(0, 110, 10))], fontsize=TICK_S)
    plt.yticks(np.array(m1_indices) - 0.05, clean_class_names(m1_class_names),  fontsize=TICK_S+3,
                       horizontalalignment='right')
#     plt.ylabel('Transient Class', fontsize=LAB_S)
    plt.xlabel(plot_type, fontsize=LAB_S+1)  
#     plt.title(plot_type, fontsize=TITLE_S)
    plt.savefig("../output/custom_figures/prior_comp_combined_" + plot_type + ".pdf")




In [None]:
def load_prev_exp(expnum, model):
    exp_dir = "/Users/marina/Documents/PhD/research/astro_research/experiments/paper_set/" 
    pickle_dir = exp_dir + expnum + "/"

    with open(pickle_dir + 'results.pickle', 'rb') as handle:
        results = pickle.load(handle)
    model.results = results

    with open(pickle_dir + 'y.pickle', 'rb') as handle:
        y = pickle.load(handle)
    model.y = y
    return model

model = MultiModel(folds = 10,
                   min_class_size = 40,  
                   class_labels = ['Unspecified Ia', 'Unspecified II', 'Ia-91bg', 'TDE'],
                   priors = [0.73, 0.24, 0.01, 0.007],
                   transform_features = True,
                   cols = mags)  
multi_w = load_prev_exp(expnum="multi_priors/Multiclass_Classifier4/",model=model)


modelwithout = MultiModel(folds = 10,
                   min_class_size = 40,  
                   class_labels = ['Unspecified Ia', 'Unspecified II', 'Ia-91bg', 'TDE'], 
                   transform_features = True,
                   cols = mags)  
multi_wo = load_prev_exp(expnum="multi_no_priors/Multiclass_Classifier3/",model=modelwithout)


In [None]:
plot_together(m1=multi_w, m2=multi_wo, m1_name="With priors", m2_name="Without priors", plot_type = "Purity")

plot_together(m1=multi_w, m2=multi_wo, m1_name="With priors", m2_name="Without priors", plot_type = "Completeness")


In [None]:
multi_w.range_metrics = multi_w.compute_probability_range_metrics(
        multi_w.results, bin_size=0.2)
multi_wo.range_metrics = multi_wo.compute_probability_range_metrics(
        multi_wo.results, bin_size=0.2)

In [None]:
num_classes

In [None]:
multi_w.class_labels

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from thex_data.data_consts import ROOT_DIR
from mainmodel.helper_plotting import *

class_labels = multi_w.class_labels
num_classes = len(multi_w.class_labels) 
f, ax = plt.subplots(nrows=num_classes,
                     ncols=2,
                     sharex=True, sharey=True,
                     figsize=(FIG_WIDTH, 8),
                     dpi=DPI)
plot_index = 0
for class_index in range(len(class_labels)):
    if plot_index == 0:
        # Add titles to top of plots
        ax[plot_index][0].set_title("With priors", fontsize=11)
        ax[plot_index][1].set_title("Without priors", fontsize=11)

    class_name = class_labels[class_index]
    print(class_name)
    plot_model_rates(class_name, multi_w, ax[plot_index][0])
    plot_model_rates(class_name, multi_wo, ax[plot_index][1])

    pretty_class_name = clean_class_name(class_name)
    ax[plot_index][0].set_ylabel(pretty_class_name, fontsize=9)
    plot_index += 1

y_indices = [0.1, 0.3, 0.5, 0.7, 0.9]
y_ticks = ["10%", "30%", "50%", "70%", "90%"]
# x and y indices/ticks are the same
plt.xticks(np.arange(5), y_ticks)
plt.yticks(y_indices, y_ticks)
plt.rc('xtick', labelsize=7)
plt.rc('ytick', labelsize=7)

f.text(0.5, 0.08, 'Assigned Probability' + r' $\pm$10%', fontsize=10, ha='center')
f.text(0.0, .5, 'Empirical Probability',
       fontsize=10, va='center', rotation='vertical')

plt.subplots_adjust(wspace=0, hspace=0.1)

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif']

f.savefig(ROOT_DIR + "/output/custom_figures/merged_metrics_priors_comp.pdf", bbox_inches='tight')
plt.show()

## Example output plots

In [1]:
import random
import numpy as np
import os
import math
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

from mainmodel.helper_compute import *
from thex_data.data_consts import *
import utilities.utilities as thex_utils


def plot_example_output(model, row, i=None, priors=None):
    """
    Plots example output for a set of probabilities for a particular host-galaxy
    :param row: Numpy array of probabilities in order of self.class_labels and then TARGET_LABEL
    :param i: Index of sample
    :param priors: Boolean if using priors, for saving
    """
    labels = row[len(row) - 1]
    true_class_index = None
    for class_index, class_name in enumerate(model.class_labels):
        if class_name in thex_utils.convert_str_to_list(labels):
            true_class_index = class_index

    f, ax = plt.subplots(figsize=(5, 3), dpi=220)

    ACC = "#b3e0ff"  # actual class color, light blue
    DCC = "#005c99"  # default class color, dark blue
    if priors:
        ACC = "#00ffbf"  # actual class color, light green
        DCC = "#00664d"  # default class color, dark green 
        
    colors = [DCC] * len(model.class_labels)
    colors[true_class_index] = ACC
    probabilities = row[0:len(row) - 1] 
    x_indices = np.linspace(0,
                            len(model.class_labels) * 0.4,
                            len(model.class_labels))
    ax.bar(x=x_indices, height=probabilities,  width=0.4, color=colors, bottom=-0.01)
    print("\n Probs for this sample")
    print(probabilities)

    plt.xlabel('Class', fontsize=LAB_S)
    pretty_class_names = clean_class_names(model.class_labels)
    plt.xticks(x_indices, pretty_class_names, fontsize=TICK_S)
    
    ax.set_ylim([0, 1])
    yticks = np.arange(0,1.2,.2) 
    plt.yticks(yticks, [str(int(i*100)) + "%" for i in yticks], fontsize=TICK_S)
    plt.ylabel('Probability Assigned', fontsize=LAB_S)
    
    f.set_title(i)
    plt.show() 

def get_sample_name(model, sample):
#     sample = model.X.iloc[2] 
    features = list(model.X)
    match_str = None
    for feature in features:
        
        v = full_data_set[full_data_set[feature] == sample[feature]] 
        n= v['name'].values[0]
        if match_str is None:
            match_str = n
        else:
            if match_str != n:
                raise ValueError("More than 1")
    return match_str

def plot_cor_examples(model_with, model_wo, sample_index=None):
    """
    Plot corresponding example outputs, with and without priors
    :param row: Numpy array of probabilities in order of self.class_labels and then TARGET_LABEL
    :param i: Index of sample
    :param priors: Boolean if using priors, for saving
    """ 
    
    
    labels = model_with.y.iloc[sample_index]['transient_type']
    
    true_class_index = None
    for class_index, class_name in enumerate(model_with.class_labels):
        if class_name in thex_utils.convert_str_to_list(labels):
            true_class_index = class_index 
    f, ax = plt.subplots(nrows=1,
                         ncols=2,
                         sharex=True, sharey=True,
                         figsize=(6, 2),
                         dpi=200) 
    
    x_indices = np.linspace(0, len(model_with.class_labels) * 0.4,  len(model_with.class_labels)) 
    X_example = model_with.X.iloc[sample_index]
    model_with_ps = list(model_with.get_class_probabilities(X_example).values())
    model_without_ps = list(model_wo.get_class_probabilities(X_example).values())
    
    plt.rcParams['xtick.labelsize'] = 8.5 
    pretty_class_names = clean_class_names(model_wo.class_labels)
#     plt.xticks(x_indices, pretty_class_names, rotation=-20)
    
    ACC = "#b3e0ff"  # actual class color, light blue
    DCC = "#005c99"  # default class color, dark blue
    colors = [DCC] * len(model_with.class_labels)
    colors[true_class_index] = ACC 
    ax[0].bar(x=x_indices, height=model_without_ps,  width=0.4, color=colors, bottom=0)
    ax[0].set_title("Without Priors", fontsize=10)
    
    ACC = "#00ffbf"  # actual class color, light green
    DCC = "#00664d"  # default class color, dark green 
    colors = [DCC] * len(model_with.class_labels)
    colors[true_class_index] = ACC 
    ax[1].bar(x=x_indices, height=model_with_ps,  width=0.4, color=colors, bottom=0)
    ax[1].set_title("With Priors", fontsize=10)
    ax[0].set_ylim([0, 1])
    ax[1].set_ylim([0, 1])
    yticks = np.arange(0,1.2,.2) 
    plt.yticks(yticks, [str(int(i*100)) + "%" for i in yticks])
    ax[0].set_ylabel('Probability Assigned')
    
    
    n = get_sample_name(model_with, model_with.X.iloc[sample_index])
    plt.figtext(0.5, 1, n, ha='center', va='center') 
    plt.tight_layout()
    plt.xticks(x_indices, pretty_class_names)
#     plt.gcf().subplots_adjust(top=1.5)
    plt.savefig("../output/custom_figures/" + str(sample_index) + ".pdf",bbox_inches='tight')
    plt.show()

def plot_sample(model, priors, sample_index):
    X_example = model.X.iloc[sample_index]
    y_example = model.y.iloc[sample_index]
    # Get probs and convert from map to list 
    ps = list(model.get_class_probabilities(X_example).values())
    r = np.hstack((ps, y_example.values)) # combine w/ label
    plot_example_output(model, r, sample_index, priors) 

def plot_new_samples(model, num_samples, indices, priors):
    """
    Randomly sample from this list and plot examples
    :param num_samples: The number of samples to randomly sample
    :param indices: all indices for a particular class to sample from
    :param priors: Boolean
    """
    rand_indices = np.random.choice(indices, num_samples, replace=False)
    for sample_index in rand_indices:
        plot_sample(model, priors, sample_index)
    return rand_indices


In [None]:
do_index = 733
do_index = 800
while do_index< 880:
    
    row = model.results[0][do_index]
    # 1015
    labels = row[len(row) - 1]
    true_class_index = None
    for class_index, class_name in enumerate(model.class_labels):
        if class_name in thex_utils.convert_str_to_list(labels):
            true_class_index = class_index

    f, ax = plt.subplots(figsize=(2, 4), dpi=400)

    ACC = "#005c99"# actual class color, light blue
    DCC = "#b3e0ff" # default class color, dark blue
    # if priors:
    #     ACC = "#00ffbf"  # actual class color, light green
    #     DCC = "#00664d"  # default class color, dark green 

    colors = [DCC] * len(model.class_labels)
    colors[true_class_index] = ACC
    probabilities = row[0:len(row) - 1] 
    bar_size = 0.1
    x_indices = np.linspace(0,
                            len(model.class_labels) * bar_size,
                            len(model.class_labels))

    # x_list = x_indices.tolist()
    # x_list.reverse()
    # pretty_class_names.reverse()
    # probabilities = probabilities.tolist()
    # probabilities.reverse()
    # print(x_list)
    # print(probabilities)
    ax.barh(y=x_indices, width=probabilities,  height=bar_size, color=colors)#, bottom=-0.01)

    # plt.xlabel('Class', fontsize=LAB_S)
    pretty_class_names = clean_class_names(model.class_labels)
    plt.yticks(x_indices, pretty_class_names, fontsize=8)

    ax.set_xlim([0, 1])
    yticks = np.arange(0,1.2,.2) 
    plt.xticks(yticks, [str(int(i*100)) + "%" for i in yticks], fontsize=9, rotation=-90)
    plt.xlabel('Probability', fontsize=10)

    # f.set_title(i)
    plt.gcf().subplots_adjust(left=0.42)
    plt.gcf().subplots_adjust(bottom=0.15)
    plt.savefig("../output/custom_figures/" + str(do_index)+".pdf")
    plt.show() 
    
    do_index+=1


### same -comparing with vs without priors

In [None]:
import warnings
warnings.filterwarnings('ignore')
with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)
%matplotlib inline  
from models.binary_model.binary_model import BinaryModel
from models.ind_model.ind_model import IndModel
from models.multi_model.multi_model import MultiModel
 
import pickle



# Changes from other approach - we do not scale data ; otherwise testing new points is tricky. 

multi_w = MultiModel(folds = 2,
                   min_class_size = 40,  
                   class_labels = ['Unspecified Ia', 'Unspecified II', 'Ia-91bg', 'TDE'],
                   priors = [0.65, 0.36, 0.01, 0.005],
                   transform_features = False,
                   cols = mags)  
# multi_w = load_prev_exp(expnum="106/Multiclass_Classifier13",model=model)
multi_w.run_model()


multi_wo = MultiModel(folds = 2,
                   min_class_size = 40,  
                   class_labels = ['Unspecified Ia', 'Unspecified II', 'Ia-91bg', 'TDE'], 
                   transform_features = False,
                   cols = mags)  
# multi_wo = load_prev_exp(expnum="107/Multiclass_Classifier15",model=model)
multi_wo.run_model()



from thex_data.data_init import collect_data
full_data_set=collect_data()




In [None]:
# Plot examples for each class, first WITHOUT prior

# model = multi_wo
class_indices_map = {}
for cur_class in multi_wo.class_labels:
    indices = []
    print("\nSampling class: " + str(cur_class))
    for index, row in multi_wo.y.iterrows():
        if cur_class in row['transient_type']:
            indices.append(index)
    rand_indices = np.random.choice(indices, 3, replace=False)
    for index in rand_indices:
        plot_cor_examples(multi_w, multi_wo, sample_index=index) 