In [1]:
import sys
import dill

import torch

# Monkey patch the method to enforce loading on CPU since we trained on CUDA
base_load = torch.load
torch.load = lambda f: base_load(f, map_location='cpu')

sys.path.append('../src')

In [2]:
import json
import numpy as np
import pandas as pd

df = pd.read_parquet('../preprocessed/vent-split-robust-cache/test-cols.parquet')

In [3]:
full_cats = pd.read_csv('../preprocessed/category_names.csv')[['category_name', 'name']]

top_cats = full_cats.category_name.unique()
top_cats.sort()
top_cats = {cat: i for i, cat in enumerate(top_cats)}

cat_for = {row['name']: top_cats[row.category_name] for i, row in full_cats.iterrows()}

emo_mapping = full_cats.name.unique()
emo_mapping.sort()
emo_mapping = {i: cat_for[name] for i, name in enumerate(emo_mapping)}

df['category_index'] = [emo_mapping[emotion] for emotion in df.emotion_index.tolist()]

# Data and model preparation

In [4]:
vent_bert_lstm_hash = '0f3700bb5aa13110dc2f5cbedc09701f'
vent_tfidf_logreg_hash = '1bc7b487f61026fdfefc8afb524589cf'

experiment_hash = vent_bert_lstm_hash
is_bert = experiment_hash == vent_bert_lstm_hash

config = json.load(open(f'../output/Vent/replica-full/{experiment_hash}.json'))
thresholds = np.asarray(config['results']['thresholds'][1])
cats, extractor, model = dill.load(open(f'../models/Vent/{experiment_hash}.pkl', 'rb'))

In [None]:
from utils.generators import SizedCallableWrapper, SizedBatchWrapper

NUM_LABELS = len(thresholds)
NUM_CATEGORIES = len(top_cats)
NUM_SAMPLES = 1000000
BATCH_SIZE = 256
inputs = df.text.tolist()[:NUM_SAMPLES]
labels = df.emotion_index.tolist()[:NUM_SAMPLES]

input_batches = SizedBatchWrapper(inputs, batch_size=BATCH_SIZE)
input_vectors = SizedCallableWrapper(input_batches, extractor)
output = model.predict(input_vectors)
output_labels = 1 * (output > thresholds) 

Predict:  43%|████▎     | 1628/3807 [11:23:01<14:45:32, 24.38s/it]

# Category-level classification report

In [None]:
from utils.evaluate import classification_report

dataset_samples = len(output)
category_output = np.zeros((len(output), NUM_CATEGORIES))

for instance_index, label_index in zip(*np.nonzero(output_labels)):
    category_index = emo_mapping[label_index]
    category_output[instance_index, category_index] = 1

category_truth = np.zeros((len(output), NUM_CATEGORIES))
category_truth[np.arange(dataset_samples), df.category_index.tolist()[:dataset_samples]] = 1

report = classification_report(category_truth, category_output, sorted(list(top_cats)))
total_support = 0
total_prec = 0
total_rec = 0
for label, stats in report['labels'].items():
    p = stats['precision']
    r = stats['recall']
    f = stats['f1']
    s = stats['support']
    print('{}\tPrecision: {:.3f}\tRecall: {:.3f}\tF1-score: {:.3f}\tSupport: {:.0f}'.format(label, p, r, f, s))
    total_rec += r * s
    total_prec += p * s
    total_support += s
    
total_prec /= total_support
total_rec /= total_support
print('')
print('Mean macro F1-score: {:.2f}'.format(report['macro_f1']))
print('Mean micro F1-score: {:.2f}'.format(report['micro_f1']))
print('Mean micro Precision: {:.2f}'.format(total_prec))
print('Mean micro Recall: {:.2f}'.format(total_rec))

# Confusion matrix & Hierarchical Clustering of label activations

In [None]:
label_confusion_matrix = np.zeros((NUM_LABELS, NUM_LABELS))

for instance_index, label_index in zip(*np.nonzero(output_labels)):
    actual_label = labels[instance_index]
    label_confusion_matrix[actual_label, label_index] += 1

for i, row in enumerate(label_confusion_matrix):
    row_count = row.sum()
    if row_count > 0:
        label_confusion_matrix[i] /= row.sum()
    
label_confusion_matrix

In [None]:
from matplotlib import pyplot as plt

fig = plt.figure(figsize=(16, 16), dpi=200)
plt.matshow(label_confusion_matrix, fignum=0)
plt.title('Actual vs Predicted Label Matrix')
plt.xlabel('Predicted labels for comments under the given label.')
plt.ylabel('User-provided labels.')
plt.yticks(np.arange(NUM_LABELS), labels=cats)
plt.xticks(np.arange(NUM_LABELS), labels=cats, rotation='vertical')
fig.axes[0].xaxis.tick_top()
fig.axes[0].xaxis.set_label_position('top') 
plt.show()

In [None]:
from scipy.cluster.hierarchy import dendrogram
from sklearn.cluster import AgglomerativeClustering

def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram
    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count
    linkage_matrix = np.column_stack([model.children_, model.distances_,
                                      counts]).astype(float)
    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, labels=cats, **kwargs)

dend = AgglomerativeClustering(distance_threshold=0, n_clusters=None)
dend = dend.fit(2 / (label_confusion_matrix + 1.0) - 1)

plt.figure(figsize=(9, 2), dpi=240)
plt.yticks([])
plt.title('Actual vs Predicted Emotion Hierarchical Clustering')
plot_dendrogram(dend, truncate_mode='level', p=88)
plt.show()

# Label confusion table & predictions

Find the top 5 most chosen labels given every label and some sample predictions on the text.

In [None]:
', '.join([cats[x] for x in label_confusion_matrix.T.sum(axis=1).argsort()[::-1]])

print('\n'.join([f'{cats[i]}: {", ".join([cats[l] for l in labels])}' 
                 for i, labels in enumerate(label_confusion_matrix.argsort()[:,::-1][:, :5])]))

In [None]:
predictions = (', '.join([cats[idx] for idx in values[-5:]]) for values in output.argsort().tolist()[:100])

for i, p in zip(inputs, predictions):
    print(f'{p} -- {i}')