## Precision / Recall / F1 script

A script to calculate precision recall and F1 score. This script also calculate the compression ratio and string-for-string match. The script provides different read functions depending on the model which is being evaluated.

In [None]:
import pickle
import numpy as np
from sklearn.metrics import confusion_matrix
from LSTM_reconstruct import remove_padding
from LSTM_reconstruct import LSTM_reconstruct
from BERT_reconstruct import BERT_reconstruct
from BERT_reconstruct import BERT_rules_reconstruct
from nltk import word_tokenize

In [None]:
# Read the test data

def read_data(test_file):
    with open(test_file, 'rb') as f:
        data = pickle.load(f)
        return data

In [None]:
test_file = "" # Path to test output
test_data = read_data(test_file)

In [None]:
# If the output is from a BERT model

non_pad_labels = [i for i in test_data['true_labels']]
non_pad_preds = [i for i in test_data['predicted_labels']]

pred_reconstructions, target_reconstructions, originals = BERT_reconstruct(test_data)

In [None]:
# If the output is from a Rules based model

non_pad_labels = [i for i in test_data['true_labels']]
non_pad_preds = [i for i in test_data['predicted_labels']]

In [None]:
# If the output is from a BERT_rules ensemble model

rules_data = read_data("") # Path to rules output

pred_reconstructions, target_reconstructions, originals = BERT_rules_reconstruct(test_data, rules_data)

In [None]:
# If the output is from a LSTM model

true_labels = [i for i in test_data['target_labels']]
predicted_labels = [i for i in test_data['predicted_labels']]

non_pad_labels, non_pad_preds = remove_padding(true_labels, predicted_labels)

target_reconstructions = []
pred_reconstructions = []

for i in range(len(test_data['predictions'])):
    pred_recon, target_recon = LSTM_reconstruct(test_data['predictions'][i],
                                           test_data['targets'][i])
    pred_reconstructions.append(pred_recon)
    target_reconstructions.append(target_recon)

In [None]:
# Calculate accuracy and confusion matrix

correct = 0
total = 0
for i in range(len(non_pad_labels)):
    for j in range(len(non_pad_labels[i])):
        total += 1    
        if non_pad_labels[i][j] == non_pad_preds[i][j]:
            correct += 1

print("Accuracy:")
print(correct/total)

labels_concat = np.concatenate((non_pad_labels))
preds_concat = np.concatenate((non_pad_preds))

# Confusion matrix

confusion_matrix(labels_concat, preds_concat)

In [None]:
# Precision and recall (retained tokens) - hard coded from confusion matrix outputs (because of occasional label noise)

precision = 939 / (939+272)
recall = 939 / (939+491)
f1 = (2*precision*recall)/(precision+recall)

print(precision)
print(recall)
print(f1)

In [None]:
# String for string match (LSTM)

matches = 0
total = 0
for i in range(len(test_data['predictions'])):
    if test_data['predictions'][i] == test_data['targets'][i]:
        matches += 1
    total += 1
print("Total matches:")
print(matches)
print(matches/(total/100))

In [None]:
# String for string match (BERT)

matches = 0
total = 0
for i in range(len(pred_reconstructions)):
    if pred_reconstructions[i] == target_reconstructions[i]:
        matches += 1
    total += 1
print("Total matches:")
print(matches)
print(matches/(total/100))

In [None]:
# Compression ratio BERT

crs = 0

for i in range(len(pred_reconstructions)):
    token_original = word_tokenize(originals[i])
    token_prediction = word_tokenize(pred_reconstructions[i])
    cr = len(token_prediction)/len(token_original)
    crs += cr
    
crs/len(pred_reconstructions)    

In [None]:
# Compression ratio Rules

crs = 0

for i in range(len(test_data['originals'])):
    token_original = word_tokenize(test_data['originals'][i])
    token_prediction = word_tokenize(test_data['predictions'][i])
    cr = len(token_prediction)/len(token_original)
    crs += cr
    
crs/len(pred_reconstructions)

In [None]:
# Recover originals for LSTM

originals = []

for i in range(len(test_data['predictions'])):
    original = test_data['wholeInput'][i]
    if len(original) != 0:
        if str(original)[0:2] == "b'" and str(original)[-1] == "'":
            original = str(original)[2:-1]
            originals.append(original)
        elif str(original)[0:2] == 'b"' and str(original)[-1] == '"':
            original = str(original)[2:-1]
            originals.append(original)
            
# Compression ratio LSTM

crs = 0

for i in range(len(originals)):
    token_original = word_tokenize(originals[i])
    token_prediction = word_tokenize(pred_reconstructions[i])
    cr = len(token_prediction)/len(token_original)
    crs += cr
    
crs/len(pred_reconstructions)