In [None]:
from transformers import pipeline, AutoConfig, GPT2LMHeadModel, GPT2Tokenizer
import pickle
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk
import sklearn.utils
import sklearn.model_selection
import sklearn.metrics
import sklearn.linear_model

In [None]:
data = np.load('results/results_neo_test.npz')
probas = data['preds']
y = data['y']

In [None]:
with open('data_train.p', 'rb') as f:
    data_train = pickle.load(f)

In [None]:
with open('outputs.p', 'rb') as f:
    outputs_raw = pickle.load(f)

In [None]:
outputs_raw

## Calculate Preds

In [None]:
MLE = lambda probas: np.argmax(probas, axis=1)

In [None]:
preds = MLE(probas)

### MAP

In [None]:
MAP = lambda probas, prior: np.argmax(probas * prior, axis=1)

In [None]:
prior_manual = np.array([5.0, 23.5, 3.5, 2.25, 3.7])
prior_empirical = np.array([461., 624., 339.,  95.,  72.])

In [None]:
map_emp = MAP(probas, prior_empirical)

In [None]:
map_man = MAP(probas, prior_manual)

### Code for manual grid search

In [None]:
range_0 = []# np.arange(8, 16, .5)
range_1 = []# np.arange(1, 6, .5)
range_2 = []# np.arange(2, 7, .5)
range_3 = []# np.arange(3, 8, .5)
range_4 = []# np.arange(1, 3, .5)

In [None]:
best_f1 = 0
best_weights = []

for w0 in range_0:
    print(f'w0: {w0}')
    for w1 in range_1:
        for w2 in range_2:
            for w3 in range_3:
                for w4 in range_4:
                    cur_weights = np.array([w0, w1, w2, w3, w4])
                    cur_preds = MAP(probas, cur_weights)
                    cur_f1 = sk.metrics.f1_score(y, cur_preds, average='macro')
                    if cur_f1 > best_f1:
                        best_f1 = cur_f1
                        best_weights = [w0, w1, w2, w3, w4]

* 0.38: [10, 3, 4, 5, 1]
* 0.40: [13.5, 3.5, 4.5, 5.0, 1.0]

In [None]:
best_weights = [13.5, 3.5, 4.5, 5.0, 1.0]

In [None]:
report(y, MAP(best_weights, probas))

## Classification Reports

In [None]:
def report(y, preds, save_name=False):
    print(sk.metrics.classification_report(y, preds, zero_division=0))
    sk.metrics.ConfusionMatrixDisplay.from_predictions(y, preds, display_labels=data_train.keys(), \
                                                       xticks_rotation=45, normalize='true')
    if save_name:
        plt.savefig(save_name, bbox_inches='tight')
    plt.show()

In [None]:
print("MLE:")
report(y, preds)

In [None]:
print("MAP:")
report(y, map_emp)

In [None]:
print("MAP:")
report(y, map_man)