In [8]:
%load_ext autoreload
%autoreload 2

import numpy as np
from config import *
from util import *
from preprocessing import *
import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
X = np.load(INTERMEDIATE / 'logmel.npy')
Y = np.load(INTERMEDIATE / 'logmel_labels.npy')

In [None]:
def save_metrics(LB_metrics, working_dir):
    def plot_pr(name, labels, predictions, **kwargs):
        prec, rec, _ = precision_recall_curve(labels, predictions)
        plt.plot(prec, rec, label=name, linewidth=2, **kwargs)
        plt.xlabel('Precision')
        plt.ylabel('Recall')
        plt.xlim([0,1])
        plt.ylim([0,1])
        plt.grid(True)

    # save the metrics here aswell
    with open(working_dir / 'metrics.pkl', 'rb') as f:
        pickle.dump(LB_metrics, f)
    
    # classwize PR curve
    label_order = DEFAULT_TOKENS.keys()
    for i, name in enumerate(label_order):
        plot_pr(name, test_Y[:, i], pred_Y[:, i])
        plt.savefig(working_dir / f'{name} PR Curve.png'))
        plt.show()

    # AP curves
    x = [lb for lb, _ in LB_metrics]
    for i, l in enumerate(LABELS):
        y = [m[l]['auc_pr'] for _, m in LB_metrics]
        title = f'AP of {l} for Random Sampling (oversampled)'
        plt.plot(x, y)
        plt.ylim(0, 1)
        plt.title(title)
        plt.savefig(working_dir / f'{title}.png'))
        plt.show()

    # mAP curve
    AP = np.zeros(shape=(4, len(x)))
    for i, l in enumerate(LABELS):
        AP[i] = np.array([m[l]['auc_pr'] for _, m in LB_metrics])
    
    title = f'mAP of {l} for Random Sampling (oversampled)'
    plt.plot(x, AP.mean(axis=0))
    plt.ylim(0, 1)
    plt.title(title)
    plt.savefig(working_dir / f'{title}.png'))
    plt.show()



In [None]:
working_dir = INTERMEDIATE / 'AL' / 'LC_oversampled'

def LC_sampling(classifier, X_unlabelled, n_samples):
    probabilities = classifier.predict_proba(X_unlabelled)
    # Calculate least confidence (1 - max probability)
    least_confidence = 1 - np.max(probabilities, axis=1)
    uncertain_indices = np.argsort(least_confidence)
    selected_indices = uncertain_indices[:n_samples]
    return selected_indices

metrics = AL_resnet16_simulation(LC_sampling, X, Y, 
                                 metrics_pkl=working_dir / 'metrics.pkl', 
                                 batch=32, n_queries=200, oversample=True)


