## Classification results

This notebook contains functions to measure the performance of a classifier applied over a set of annotations.

The output of the classification files is expected to be compatible with the output of the UKPLab models. Each line contains the token, the true label, and predicted label of one classification example, in that order, separated by tabs. The first row has the columns names. For example:

`word_n    I-Premise:2:Support    I-Premise:-1:Support`

Training results must be stored in tsv format with the following columns: epoch, modelName, dev_score, test_score, max_dev_score, max_test_score

In [1]:
import numpy
import matplotlib.pyplot as plt
import pandas
import pickle
import os
import re
import seaborn as sns
from sklearn import metrics
sns.set_style('white')
sns.set_palette('colorblind')

In [2]:
CC_EXPERIMENTS_DIRS = [
    ## Full partitions
#     '../../results/ukpnets/paragraph/18-08-17-20-19/',  # Time pre-attention no act
#    '../../results/ukpnets/paragraph/18-08-18-14-19/',  # Baseline, same as 18-06-07-09-40   !!!!
#     '../../results/ukpnets/paragraph/18-08-24-15-47/',  # Feature wise unknown activation
    ## Exploration 1 / Timewise sigmoid
#     '../../results/ukpnets/paragraph/18-08-29-23-12/',
#     '../../results/ukpnets/paragraph/18-08-30-01-28/',
    ##
    ## Exploration 2 / Timewise sigmoid
#     '../../results/ukpnets/paragraph/18-08-30-02-46/',  # Best result
#     '../../results/ukpnets/paragraph/18-08-30-04-10/',
#     '../../results/ukpnets/paragraph/18-08-30-05-00/',
#     '../../results/ukpnets/paragraph/18-08-30-06-05/',
#     '../../results/ukpnets/paragraph/18-08-30-07-10/',
    '/home/mteruel/am/results/ukpnets/paragraph/18-08-30-18-51/', # Full eval timewise sigmoid
    ## Exploration 2 / Featurewise sigmoid
#     '../../results/ukpnets/paragraph/18-08-31-00-48/',
#     '../../results/ukpnets/paragraph/18-08-31-02-46/',
#     '../../results/ukpnets/paragraph/18-08-31-05-55/',
#     '../../results/ukpnets/paragraph/18-08-31-08-06/',
#     '../../results/ukpnets/paragraph/18-08-31-10-26/',
    ##
    '/home/mteruel/am/results/ukpnets/paragraph/18-08-31-14-58/',  # Full eval featurewise sigmoid
    ##
#    '../../results/ukpnets/paragraph/18-09-01-14-03/',
 #   '../../results/ukpnets/paragraph/18-09-01-16-40/',
#    '../../results/ukpnets/paragraph/18-09-01-18-46/',
#    '../../results/ukpnets/paragraph/18-09-01-19-56/',
#    '../../results/ukpnets/paragraph/18-09-01-22-00/',
]

## Classification Results - Dev and Test

In [3]:
def labels_single_file(filename):
    result = pandas.read_csv(filename, sep='\t')
    return result

In [4]:
def prediction_filenames(dirname):
    return [run_prediction for run_prediction in os.listdir(dirname)
            if os.path.isfile(os.path.join(dirname, run_prediction)) and 'predictions' in run_prediction]

In [5]:
def metrics_single_classifier(classifier_dirpath, dataset_name='ukp'):
    """Reads all partitions for a single classifier."""
    result = pandas.DataFrame(columns=[
        'Classifier', 'Partition', 'Dataset',
        'Accuracy', 'Precision', 'Recall', 'F1-Score', 'Support'])
    for index, prediction_file in enumerate(prediction_filenames(classifier_dirpath)):
        predictions = labels_single_file(os.path.join(classifier_dirpath, prediction_file))
        accuracy = metrics.accuracy_score(predictions['True'], predictions.Predicted)
        precision, recall, f1, _ = metrics.precision_recall_fscore_support(
            predictions['True'], predictions.Predicted, average='weighted', warn_for=[])
        support = len(predictions)
        if 'partition' in prediction_file: 
            partition = re.search('.*partition(\d+).*', prediction_file).group(1)
        else:
            partition = 0
        dataset = re.search('.*{}_(\w+).conll'.format(dataset_name), prediction_file).group(1)
        classifier = os.path.basename(os.path.normpath(classifier_dirpath))
        result.loc[index] = [classifier, partition, dataset, accuracy, precision, recall, f1, support]
    return result

In [6]:
def experiments_metrics(experiments_dirs, dataset_name='ukp'):
    """Reads metrics for all classifiers"""
    classifier_metrics = []
    for classifier_path in experiments_dirs:
        classifier_metrics.append(metrics_single_classifier(classifier_path, dataset_name))
    return pandas.concat(classifier_metrics)

In [7]:
METRIC_COLS = ['Accuracy', 'Precision', 'Recall', 'F1-Score']

## COMPONENT CLASSIFICATION

### Classifier summary`

## Learning rate

In [8]:
def learning_single_file(filename):
    result = pandas.read_csv(
        filename, sep='\t', header=None,
        names=['epoch', 'model_name', 'dev_score', 'test_score', 'max_dev_score', 'max_test_score'])
    return result

In [9]:
def learning_filenames(dirname):
    return [learning_metrics for learning_metrics in os.listdir(dirname)
            if os.path.isfile(os.path.join(dirname, learning_metrics)) and 'results' in learning_metrics]

In [10]:
def learning_metrics_single_classifier(classifier_dirpath):
    """Reads all partitions for a single classifier."""
    result = []
    for index, results_file in enumerate(learning_filenames(classifier_dirpath)):
        learning_metrics = learning_single_file(os.path.join(classifier_dirpath, results_file)).drop(
            columns=['model_name', 'max_dev_score', 'max_test_score'])
        learning_metrics = learning_metrics.set_index(['epoch']).stack().reset_index().rename(
            columns={0: 'Metric Value', 'level_1': 'Dataset'})
        learning_metrics['Partition'] = re.search('.*partition(\d+).*', results_file).group(1)
        learning_metrics['Classifier'] = os.path.basename(os.path.normpath(classifier_dirpath))
        result.append(learning_metrics)
    return pandas.concat(result)

In [11]:
def learning_metrics(experiment_dirs):
    """Reads learning metrics for all classifiers"""
    classifier_learning_metrics = []
    for classifier_path in experiment_dirs:
        classifier_learning_metrics.append(learning_metrics_single_classifier(classifier_path))
    return pandas.concat(classifier_learning_metrics)

---

# Essays

In [18]:
ESSAYS_EXPERIMENTS_DIRS = [
    ## Exploration 2 / Timewise sigmoid
'/home/mteruel/am/results/essays2/18-11-11-13-00',
'/home/mteruel/am/results/essays2/18-11-11-13-09',
'/home/mteruel/am/results/essays2/18-11-11-13-20',
'/home/mteruel/am/results/essays2/18-11-11-13-30',
'/home/mteruel/am/results/essays2/18-11-11-13-45',
'/home/mteruel/am/results/essays2/18-11-11-13-55',
'/home/mteruel/am/results/essays2/18-11-11-14-07',

]

In [19]:
es_results = experiments_metrics(ESSAYS_EXPERIMENTS_DIRS, 'essays').set_index('Classifier')
es_results[es_results.Dataset == 'dev'][METRIC_COLS]

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1-Score
Classifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
18-11-11-13-00,0.670899,0.665013,0.670899,0.663706
18-11-11-13-09,0.687827,0.685012,0.687827,0.677986
18-11-11-13-20,0.653224,0.651541,0.653224,0.650573
18-11-11-13-30,0.684093,0.682142,0.684093,0.680774
18-11-11-13-45,0.664426,0.656226,0.664426,0.657778
18-11-11-13-55,0.681603,0.671461,0.681603,0.669277
18-11-11-14-07,0.697038,0.68708,0.697038,0.682246


### Confusion matrix

In [20]:
def print_confusion_matrix(confusion_matrix, class_names, figsize = (10,7), fontsize=14):
    """Prints a confusion matrix, as returned by sklearn.metrics.confusion_matrix, as a heatmap.
    
    Args:
        confusion_matrix: numpy.ndarray
            The numpy.ndarray object returned from a call to sklearn.metrics.confusion_matrix. 
            Similarly constructed ndarrays can also be used.
        class_names: list
            An ordered list of class names, in the order they index the given confusion matrix.
        figsize: tuple
            A 2-long tuple, the first value determining the horizontal size of the ouputted figure,
            the second determining the vertical size. Defaults to (10,7).
        fontsize: int
            Font size for axes labels. Defaults to 14.
        
    Returns:
        The resulting confusion matrix figure
    """
    df_cm = pandas.DataFrame(
        confusion_matrix, index=class_names, columns=class_names, 
    )
    fig = plt.figure(figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    return fig

In [21]:
def plot_confusion_matrix(classifier_name, partition=0):
    for classifier_dirname in CC_EXPERIMENTS_DIRS + REL_EXPERIMENTS_DIRS:
        if not classifier_name in classifier_dirname:
            continue
        prediction_files = prediction_filenames(classifier_dirname)
        prediction_file = None
        for possible_prediction_file in prediction_files:
            if 'partition' + str(partition) in possible_prediction_file:
                prediction_file = possible_prediction_file
                break
        if prediction_file is None:
            raise ValueError('No prediction for the given partition')
        predictions = labels_single_file(os.path.join(classifier_dirname, prediction_file))
        labels = numpy.unique(numpy.concatenate([predictions['True'].values, predictions.Predicted.values]))
        print(metrics.classification_report(predictions['True'], predictions.Predicted, labels=labels))
        cm = metrics.confusion_matrix(predictions['True'], predictions.Predicted, labels=labels)
        print(cm.sum())
        print_confusion_matrix(cm, labels)
        return None
    raise ValueError('Classifier not found')

In [16]:
plot_confusion_matrix('18-06-07-09-40', partition=5)

NameError: name 'REL_EXPERIMENTS_DIRS' is not defined

In [17]:
plot_confusion_matrix('18-06-01-06-57', partition=5)

NameError: name 'REL_EXPERIMENTS_DIRS' is not defined