In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib import rc
from datetime import datetime

from thex_data.data_consts import *
from models.multi_model.multi_model import MultiModel

from evaluation.plotting import *
from estimate.get_data import *



In [2]:
cols = ["g_mag", "r_mag", "i_mag", "z_mag", "y_mag",
        "W1_mag", "W2_mag", "H_mag", "K_mag", 'J_mag',
        Z_FEAT]

codes = ["A1", "F1", "B1", "G1"]
model = MultiModel(cols=cols,
                   class_labels=['Unspecified Ia', 'II'],
                   transform_features=False,
                   case_code=codes,
                   min_class_size=40,
                   data_file=CUR_DATA_PATH,
                   )

Saving Multiclass Classifier output to directory /Users/marina/Documents/PhD/research/astro_research/code/environments/dist_env/lib/python3.8/site-packages/thex_data/../output/Multiclass_Classifier21


Constructing Class Hierarchy Tree...
Using data: /Users/marina/Documents/PhD/research/astro_research/code/dist_code/estimate/../../../data/catalogs/v8/THEx-v8.0-release.mags-xcalib.min-xcal.fits

Classes:
['Unspecified Ia', 'II']

Features:
['g_mag', 'r_mag', 'i_mag', 'z_mag', 'y_mag', 'W1_mag', 'W2_mag', 'H_mag', 'K_mag', 'J_mag', 'event_z']


		Class Counts
Unspecified Ia : 7104
II : 2312


In [3]:
model.num_runs = 2 
thex_dataset = pd.concat([model.X, model.y], axis=1)

In [8]:
from evaluation.sampling_test import *

In [7]:
now = datetime.now()
dt_string = now.strftime("%d_%m_%Y__%H_%M_%S") 
output_dir = "../output/" + dt_string
os.mkdir(output_dir)

In [9]:

LSST_results = []
orig_results = []

for i in range(model.num_runs):
    print("\n\nIteration " + str(i + 1) + "/" + str(model.num_runs))
    # Resample testing sets each run
    X_lsst, y_lsst, X_orig, y_orig = get_test_sets(
        thex_dataset, output_dir, i)

    # Update training data to remove testing sets
    X_train, y_train = get_training_data(X_lsst, X_orig, model.X, model.y)

    # Ensure all X sets have columns in same order
    X_lsst = X_lsst[ordered_mags]
    X_orig = X_orig[ordered_mags]
    X_train = X_train[ordered_mags]

    print("\nTraining set size: " + str(X_train.shape[0]))
    print("Ia test count, LSST: " + str(get_cc(y_lsst, "Unspecified Ia")) +
          ", THEx: " + str(get_cc(y_orig, "Unspecified Ia")))
    print("II test count, LSST: " + str(get_cc(y_lsst, "Unspecified II"))
          + ", THEx: " + str(get_cc(y_orig, "Unspecified II")))

    # Train model on sampled set
    model.train_model(X_train, y_train)

    # Test model on LSST
    LSST_results.append(get_test_performance(X_lsst, y_lsst, model))

    # Test model on orig sample
    orig_results.append(get_test_performance(X_orig, y_orig, model))



Iteration 1/2

Sampling Class: Unspecified Ia

Sampling Class: II

Training set size: 8932
Ia test count, LSST: 111, THEx: 111
II test count, LSST: 116, THEx: 83


Training Multivariate KDE per class

Training KDE for Unspecified Ia
bandwidth 0.04091224489795919
average log-likelihood: -4179.320927668121

Training KDE for II
bandwidth 0.06131836734693878
average log-likelihood: -3129.3888649070723
Brier score multiclass (loss): 0.09269844241406233



Iteration 2/2

Sampling Class: Unspecified Ia

Sampling Class: II

Training set size: 8940
Ia test count, LSST: 111, THEx: 111
II test count, LSST: 121, THEx: 88


Training Multivariate KDE per class

Training KDE for Unspecified Ia
bandwidth 0.04091224489795919
average log-likelihood: -4557.228856853799

Training KDE for II
bandwidth 0.06131836734693878
average log-likelihood: -2982.9895668308927
Brier score multiclass (loss): 0.1044923834019242



In [13]:
plot_performance_c(model, y_lsst, output_dir + "/lsst_test", LSST_results)
    # Visualize performance of randomly sampled data
plot_performance_c(model, y_orig, output_dir + "/orig_test", orig_results)

plot_performance_together(model, y_lsst, LSST_results, orig_results, output_dir)

{'Unspecified Ia': 111, 'II': 122}


		Multiclass Classifier Balanced Purity
Unspecified Ia : 0.5775243273342661
II : 0.7713157680211216


		Multiclass Classifier Purity
Unspecified Ia : 0.5543167701863354
II : 0.7874736101337086


		Multiclass Classifier Completeness
Unspecified Ia : 0.8963963963963963
II : 0.3442622950819672


		Purity confidence intervals
Unspecified Ia : [0.554255900621118, 0.5543776397515529]
II : [0.7309218859957776, 0.8440253342716397]


		Completeness confidence intervals
Unspecified Ia : [0.8522522522522521, 0.9405405405405406]
II : [0.3121311475409836, 0.3763934426229508]

 Purity
Ia (unspec.) ($55.43\%\pm0.01\%$)
II ($78.75\%\pm5.66\%$)

Baselines
[0.5236051502145923, 0.47639484978540775]

 Completeness
Ia (unspec.) ($89.64\%\pm4.41\%$)
II ($34.43\%\pm3.21\%$)

Baselines
[0.5, 0.5]


  plt.show()



Confusion Matrix
[[0.8964 0.1036]
 [0.6557 0.3443]]


  plt.show()
  plt.show()
  plt.show()



Probability vs Class Rates for: Ia (unspec.)
[0.2027 0.1667 0.3571 0.2143 0.5912]


  plt.show()



Probability vs Class Rates for: II
[0.4088 0.7857 0.6429 0.8333 0.7973]
{'Unspecified Ia': 111, 'II': 122}


		Multiclass Classifier Balanced Purity
Unspecified Ia : 0.6008109923045408
II : 0.7054569101934154


		Multiclass Classifier Purity
Unspecified Ia : 0.5780246913580247
II : 0.7245884948243679


		Multiclass Classifier Completeness
Unspecified Ia : 0.8108108108108107
II : 0.45901639344262296


		Purity confidence intervals
Unspecified Ia : [0.5218864197530864, 0.634162962962963]
II : [0.657071101306635, 0.7921058883421008]


		Completeness confidence intervals
Unspecified Ia : [0.7931531531531532, 0.8284684684684683]
II : [0.3465573770491803, 0.5714754098360656]

 Purity

  plt.show()
  plt.show()



Ia (unspec.) ($57.8\%\pm5.61\%$)
II ($72.46\%\pm6.75\%$)

Baselines
[0.5236051502145923, 0.47639484978540775]

 Completeness
Ia (unspec.) ($81.08\%\pm1.77\%$)
II ($45.9\%\pm11.25\%$)

Baselines
[0.5, 0.5]

Confusion Matrix
[[0.8108 0.1892]
 [0.541  0.459 ]]


  plt.show()
  plt.show()
  plt.show()



Probability vs Class Rates for: Ia (unspec.)
[0.18   0.3939 0.3721 0.4032 0.6579]


  plt.show()



Probability vs Class Rates for: II
[0.3421 0.5968 0.6279 0.6061 0.82  ]


		Multiclass Classifier Balanced Purity
Unspecified Ia : 0.5775243273342661
II : 0.7713157680211216


		Multiclass Classifier Purity
Unspecified Ia : 0.5543167701863354
II : 0.7874736101337086


		Multiclass Classifier Completeness
Unspecified Ia : 0.8963963963963963
II : 0.3442622950819672


		Purity confidence intervals
Unspecified Ia : [0.554255900621118, 0.5543776397515529]
II : [0.7309218859957776, 0.8440253342716397]


		Completeness confidence intervals
Unspecified Ia : [0.8522522522522521, 0.9405405405405406]
II : [0.3121311475409836, 0.3763934426229508]


		Multiclass Classifier Balanced Purity
Unspecified Ia : 0.6008109923045408
II : 0.7054569101934154


		Multiclass Classifier Purity
Unspecified Ia : 0.5780246913580247
II : 0.7245884948243679


		Multiclass Classifier Completeness
Unspecified Ia : 0.8108108108108107
II : 0.45901639344262296


		Purity confidence intervals
Unspecified Ia : [0.521886419

  plt.show()


../output/17_08_2021__17_15_44
