# Explore Results

In [57]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [87]:
from pathlib import Path
import pandas as pd
import os
import joblib
import pickle
from src.sklearn_utils import predict_fn, plot_one_vs_rest_success_rates, plot_confusion_matrix, compute_cross_entropy_loss
from sklearn.metrics import multilabel_confusion_matrix
import numpy as np

In [59]:
artifacts_dir = Path(os.path.abspath('')).parent / 'artifacts'
model_path = artifacts_dir / 'gradient_boosting-20220704-153024.joblib'
model = joblib.load(model_path)



In [60]:
model.cv_results_

{'mean_fit_time': array([ 21.69707355,  64.43889155, 132.12126284]),
 'std_fit_time': array([ 0.50096234,  1.39458765, 49.39470151]),
 'mean_score_time': array([0.06359849, 0.0473938 , 0.04681373]),
 'std_score_time': array([0.01985951, 0.00117783, 0.00224644]),
 'param_clf__base_estimator__max_depth': masked_array(data=[2, 4, 6],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'clf__base_estimator__max_depth': 2},
  {'clf__base_estimator__max_depth': 4},
  {'clf__base_estimator__max_depth': 6}],
 'split0_test_score': array([0.07306397, 0.07205387, 0.07474747]),
 'split1_test_score': array([0.05938567, 0.05699659, 0.05836177]),
 'split2_test_score': array([0.06622517, 0.06390728, 0.06523179]),
 'split3_test_score': array([0.05785953, 0.05785953, 0.05719064]),
 'split4_test_score': array([0.0620339 , 0.06372881, 0.06237288]),
 'mean_test_score': array([0.06371365, 0.06290922, 0.06358091]),
 'std_test_score': array([0.00546919, 0

In [61]:
mlb = pickle.load(open(artifacts_dir / 'gradient_boosting-20220704-153024-binarizer.pkl', 'rb'))

In [62]:
df = pd.read_csv(artifacts_dir / 'gradient_boosting-20220704-153024-test.csv')
# convert strings in column to list. e.g. "['A', 'B', 'C']" -> ['A', 'B', 'C'], "['Hello', 'World']" -> ['Hello', 'World']
df['sectors_list'] = df['sectors_list'].apply(lambda x: eval(x))
df["sectors_list"] = df["sectors_list"].apply(
    lambda x: [tag for tag in x if tag in mlb.classes_.tolist()]
)

In [63]:
y_true, y_pred, hl = predict_fn(df, mlb, model)

In [64]:
naive_path = artifacts_dir / 'naive-stratified.joblib'
naive_model = joblib.load(naive_path)

In [65]:
y_pred_naive = naive_model.predict(df)

In [66]:
plot_one_vs_rest_success_rates(y_true, y_pred, y_pred_naive, classes=mlb.classes_)

# Confusion Matrices

In [67]:
mlbcm=multilabel_confusion_matrix(y_true, y_pred)

# TODO: Fix these confusion matrices. They are not correct.
# loop through multiple confusion matrices and plot them.
for ix, matrix in enumerate(mlbcm):
    class_label = mlb.classes_[ix]
    fig=plot_confusion_matrix(matrix, class_label)
    fig.show()

# Sort by loss

There are two potential ways of sorting by loss. First, we can sort by total loss i.e. look at the loss across all labels for each instance. Second, we can look at the worst performances for each label.

In [70]:
probas=model.predict_proba(df)

In [71]:
# Cross entropy is a proper scoring rule that measures how much a probabilistic distribution differs from the true distribution. We can sort by cross entropy loss to potentially find something fruitful. see https://stats.stackexchange.com/questions/312780/why-is-accuracy-not-the-best-measure-for-assessing-classification-models



In [86]:
instance_cross_entropy_losses = [compute_cross_entropy_loss(y_true[i], probas[i]) for i in range(len(y_true))]
# sort by cross entropy loss and return the indices of the instances.
sorted_indices = np.argsort(instance_cross_entropy_losses)
df_sorted = df.iloc[sorted_indices]
ground_truth_sorted = mlb.inverse_transform(y_true[sorted_indices])
predicted_sorted = mlb.inverse_transform(y_pred[sorted_indices])

worst_example_ground_truth=ground_truth_sorted[-1]
worst_example_predicted = predicted_sorted[-1]

print(f"Worst example ground truth: {worst_example_ground_truth} \n\nWorst example ground truth: {worst_example_predicted}\n\n Worst example description text: \n\n{df_sorted.iloc[-1]['description_text']}\n\n Worst example full_text: \n\n{df_sorted.iloc[-1]['full_text']}")


Worst example ground truth: ('Agriculture', 'Health', 'Industry', 'Transportation') 

Worst example ground truth: ('Economy-wide',)

 Worst example description text: 

The Myanmar Sustainable Development Plan (MSDP) has been designed as a living document that presents practical and implementable pathways toward addressing development challenges. The plan provides a long-term vision founded upon the objective of giving coherence to the policies and institutions necessary to achieve inclusive and transformational economis growth.  The plan is structured around three pillars ('A peaceful, Prosperous and Democratic Myanmar'), five goals (1) Peace, National reconciliation, Security and Good Governance; 2) Economic Stability and Strengthened, Macroeconomic Management; 3) Job Creation and Private Sector Led Growth; 4) Human resources and Social Development for a 21st Century Society; 5) Natural Resources and the Environment for Posterity of the Nation), 28 strategies and 251 action plans. 

 