# Libraries

In [1]:
import random, time, operator

import numpy as np

from sklearn.metrics import accuracy_score, f1_score

import torch
import torch.nn as nn
import torchtext
import torch.nn.functional as F
#from torchtext.data import Field, BucketIterator, Iterator, TabularDataset
from torchtext.legacy.data import Field, BucketIterator, Iterator, TabularDataset # Needed for running this on my laptop
import torch.optim as optim

# Meta variables

In [2]:
device = torch.device('cuda:1')
#device = torch.device('cpu')

my_data_directory = "../data/" # where to store files on mltgpu
my_models_directory = "../models/"

mini_testing = False
my_train_file = "mini_train.csv" if mini_testing == True else "train.csv"
my_test_file  = "mini_test.csv" if mini_testing == True else "test.csv"

dir_for_evaluations = "../evals/" # my settings on MLTGPU

# Dataloader

In [3]:
batch_size = 64

In [4]:
def dataloader(directory  = my_data_directory,
               train_file = my_train_file,
               test_file  = my_test_file,
               batch      = batch_size):
    
    num_whitespacer = lambda x: [int(e) for e in x.split(" ")]
    
    SENTENCE = Field(lower = True, 
                     batch_first = True, 
                     init_token = "<start>", 
                     eos_token = "<end>")
    
    PREDICATE = Field(tokenize = num_whitespacer, # Here might be some problems ...
                      batch_first = True, 
                      pad_token = 0,
                      use_vocab = False,
                      init_token = 0, 
                      eos_token = 0) 
    
    SRLABEL = Field(batch_first = True, 
                    init_token = "<start>", 
                    eos_token = "<end>")
    
    my_fields = [("sentence", SENTENCE),
                 ("predicate", PREDICATE),
                 ("srlabel", SRLABEL)]
    
    train, test = TabularDataset.splits(path   = directory,
                                        train  = train_file,
                                        test   = test_file,
                                        format = 'csv',
                                        fields = my_fields,
                                        csv_reader_params = {'delimiter':'\t',
                                                             'quotechar':'¤'}) # Seems not to be in data
    SENTENCE.build_vocab(train)
    SRLABEL.build_vocab(train)  

    train_iter, test_iter = BucketIterator.splits((train, test),
                                                  batch_size        = batch,
                                                  sort_within_batch = True,
                                                  sort_key          = lambda x: len(x.sentence),
                                                  shuffle           = True,
                                                  device            = device)

    return train_iter, test_iter, SENTENCE.vocab, SRLABEL.vocab

In [5]:
train, test, vocab, labels = dataloader()

# Simple Bidirectional LSTM SR Labeler

In [6]:
class SimpleBiLSTM(nn.Module):
    def __init__(self, voc_size, embedding_size, n_labels, n_layers, dropout):  
        super(SimpleBiLSTM, self).__init__()
        
        self.embeddings = nn.Embedding(voc_size, embedding_size)
        self.sp_pair = embedding_size + 1 # emedded sentence + predicate vector
        
        self.rnn = nn.LSTM(self.sp_pair, n_labels, # input DIM --> output DIM
                           num_layers = n_layers, 
                           bidirectional=True, 
                           batch_first=True, 
                           dropout = dropout)
        
    def forward(self, sentences, pred_vec, softmax = False):
        
        embeddings = self.embeddings(sentences)
        pred_vec = pred_vec.unsqueeze(2)        
        sentence_pred_pair = torch.cat((embeddings, pred_vec), dim=2)
        contextualized_embedding, *_ = self.rnn(sentence_pred_pair)
        
        if softmax == True:
            return F.softmax(contextualized_embedding, dim=2)
        else:
            return contextualized_embedding

# Training

## Training function

In [7]:
def trainer(model, # Must be an instance of a model!
            name_of_model,
            learning_rate,
            epochs,
            data,
            clip_grad = None,
            ignore_pad = False,
            val_data = None,
            save_model = False,
            directory = my_models_directory,
            my_loss_function = nn.CrossEntropyLoss,
            my_optimizer = optim.Adam
           ):
    """ Specifices a general training procedure for a model. 
        Note: trainer() requires an instantiated model as model argument. 
    """
    
    optimizer = my_optimizer(model.parameters(), lr=learning_rate)    
    
    model.to(device)
    model.train()
    
    if ignore_pad:
        pad_idx = labels.stoi["<pad>"]
        loss_function = my_loss_function(ignore_index=pad_idx) # We ignore pad token in loss calculation
    else:
        loss_function = my_loss_function()
    
    for epoch in range(epochs):
        print(f"Epoch: {epoch+1} (out of {epochs}).")
        epoch_loss = 0
        
        for i, batch in enumerate(data):
            print("Batch: ", i, end="\r")
            optimizer.zero_grad # reset gradients
            
            sentence = batch.sentence
            predicate = batch.predicate
            targets = batch.srlabel
            
            b=sentence.shape[0] # !
            sequence_length = sentence.shape[1] # !
            l = targets.shape[1] # !
                        
            output = model(sentence, predicate)
            d = output.shape[2] # !
            
            #print("Output:", output.shape)
            #print("Target:", targets.shape)
            
            loss = loss_function(output.reshape(b*sequence_length, d), # !
                                 targets.reshape(b*sequence_length))
            
            epoch_loss += loss.item()
            
            if clip_grad != None:
                nn.utils.clip_grad_norm_(model.parameters(), clip_grad) # to handle exploding gradients
            
            loss.backward() # compute gradients
            optimizer.step() # update parameters
            #break
            
        print(f"Total loss for Epoch {epoch+1}: {epoch_loss}.")
            
        if val_data != None:
            model.eval()
            # Here we could do some evaluation of model progress, but I have ignored this for now. 
            model.train()
            
    if save_model == True:
        torch.save(model, directory+name_of_model+".pt")

## Hyperparameters

In [9]:
vocab_size = len(vocab)
number_of_labels = len(labels)
print("Size of vocabulary:", vocab_size)
print("Number of labels:", number_of_labels)
epochs = 10
my_learning_rate = 0.01
my_emedding_size = 512

my_dropout = 0.2
number_of_layers = 4

Size of vocabulary: 34068
Number of labels: 87


## Calling and training model

In [10]:
SimpleModel = SimpleBiLSTM(vocab_size, my_emedding_size, number_of_labels, number_of_layers, my_dropout)

In [11]:
SimpleModel.parameters

<bound method Module.parameters of SimpleBiLSTM(
  (embeddings): Embedding(34068, 512)
  (rnn): LSTM(513, 87, num_layers=4, batch_first=True, dropout=0.2, bidirectional=True)
)>

In [12]:
protoname = f"simple_b{batch_size}ep{epochs}ly{number_of_layers}em{my_emedding_size}do{str(my_dropout)[2:]}lr{str(my_learning_rate)[2:]}"
model_name = f"{protoname}_minisample" if mini_testing else f"{protoname}_csample"

trainer(SimpleModel, model_name, my_learning_rate, epochs, train, save_model=True)

Epoch: 1 (out of 10).
Total loss for Epoch 1: 3193.007613182068.
Epoch: 2 (out of 10).
Total loss for Epoch 2: 3189.3753547668457.
Epoch: 3 (out of 10).
Total loss for Epoch 3: 3189.3469820022583.
Epoch: 4 (out of 10).
Total loss for Epoch 4: 3189.3746695518494.
Epoch: 5 (out of 10).
Total loss for Epoch 5: 3189.405362844467.
Epoch: 6 (out of 10).
Total loss for Epoch 6: 3189.301868915558.
Epoch: 7 (out of 10).
Total loss for Epoch 7: 3189.28564953804.
Epoch: 8 (out of 10).
Total loss for Epoch 8: 3189.4384977817535.
Epoch: 9 (out of 10).
Total loss for Epoch 9: 3189.3170397281647.
Epoch: 10 (out of 10).
Total loss for Epoch 10: 3189.4088864326477.


# Evaluation

In [13]:
lst_labels = [labels.itos[x] for x in range(len(labels))]

In [14]:
def metrics(prediction, 
            truth 
            #labels = lst_labels
           ):
    """ Calculates accuracy and F1, given two sequences (lists, arrays) of labels. Since, 
        these metrices here are used for multi-label classification, two versions 
        of F1 are calculated: "macro" and "weigthed", where the former is the mean of F1 for
        each label, and the latter is the mean weigthed by support (the number of true 
        instances for each label), which account for label imbalance.
    """
    
    accuracy = accuracy_score(truth, prediction)
    f1_macro = f1_score(truth, prediction, average = "macro") # Calculate metrics for each label, and find their unweighted mean. Does not take label imbalance into account.
    f1_weighted = f1_score(truth, prediction, average = "weighted") # Calculate metrics globally by counting the total true positives, false negatives and false positives.
    
    return accuracy, f1_macro, f1_weighted

def mean(array):
    """ Calculates the mean and standard deviation of an aray of numbers.
    """
    mean = np.mean(array)
    std  = np.std(array)
    return mean, std

In [15]:
class Evaluation:
    """ For storing and handling information from the evaluation of model(s).
    """
    
    def __init__(self, name):
        self.name = name
        
        self.pooled_acc         = "Not yet defined"
        self.pooled_f1_macro    = "Not yet defined"
        self.pooled_f1_weighted = "Not yet defined" 
        
        self.mean_acc           = ("Not yet defined", "Not yet defined")
        self.mean_f1_macro      = ("Not yet defined", "Not yet defined")
        self.mean_f1_weighted   = ("Not yet defined", "Not yet defined")
        
        self.corr_l_acc         = "Not yet defined"
        self.corr_l_f1_macro    = "Not yet defined"
        self.corr_l_f1_weighted = "Not yet defined"
        
        self.confusion = {"Not yet defined": {"Not yet defined": "Not yet defined"}}
        self.metrics_dict = {"accuracy": ["Not yet defined", "Not yet defined"], 
                             "f1_macro": ["Not yet defined", "Not yet defined"], 
                             "f1_weighted": ["Not yet defined", "Not yet defined"]}

    def best_case(self, metric):
        """ Returns the file which has the best performance score with respect 
            to a metric.
        """
        m_list = self.metrics_dict[metric]
        zic_zac = False if metric == "mse" else True
        m_list.sort(key=operator.itemgetter(1), reverse=zic_zac)
        return m_list[0][0]
    
    def best_cases(self, metric, n):
        """ Returns a list of the N files which has the best performance score 
            with respect to a metric.
        """
        m_list = self.metrics_dict[metric]
        zic_zac = False if metric == "mse" else True
        m_list.sort(key=operator.itemgetter(1), reverse=zic_zac)
        files, values = zip(*m_list)
        return list(files[:n])
    
    def worst_case(self, metric):
        """ Returns the file which has the best performance score with respect 
            to a metric.
        """
        m_list = self.metrics_dict[metric]
        zic_zac = True if metric == "mse" else False
        m_list.sort(key=operator.itemgetter(1), reverse=zic_zac)
        return m_list[0][0]

    def worst_cases(self, metric, n):
        """ Returns a list of the N files which has the best performance score 
            with respect to a metric.
        """
        m_list = self.metrics_dict[metric]
        zic_zac = True if metric == "mse" else False
        m_list.sort(key=operator.itemgetter(1), reverse=zic_zac)
        files, values = zip(*m_list)
        return list(files[:n])
 
    def summary(self):
        """ Summarises an evaluation. Returns string."""
        summary  = "\n".join([f"Model {self.name} performs as follows:", 
                      f"Pooled Accuracy: {self.pooled_acc}",
                      f"Pooled F1_macro: {self.pooled_f1_macro}",
                      f"Pooled F1_weighted: {self.pooled_f1_weighted}",
                              
                      f"Mean Accuracy: {self.mean_acc[0]} (std = {self.mean_acc[1]})",
                      f"Mean F1_macro: {self.mean_f1_macro[0]} (std = {self.mean_f1_macro[1]})",
                      f"Mean F1_weighted: {self.mean_f1_weighted[0]} (std = {self.mean_f1_weighted[1]})",
                      
                      f"Correlation sentence length and accuracy: {self.corr_l_acc}",
                      f"Correlation sentence length and F1_macro: {self.corr_l_f1_macro}",
                      f"Correlation sentence length and F1_weighted: {self.corr_l_f1_weighted}"]) 
        return summary
    
    def confusion_matrix(self):
        """ Returns and prints a confusion matrix. 
        """
        
        srl_labels = list(self.confusion.keys())
        
        matrix = [[""] + srl_labels] # headings
        for l in srl_labels:
            row = [l]
            for k in srl_labels:
                row.append(str(self.confusion[l][k]))
            matrix.append(row)
            
        #matrix_txt = [[str(cell) for cell in row] for row in matrix]
        
        txt = "\n".join(["\t".join(row) for row in matrix])
        
        #print(txt)
        return txt
    
    def save(self, metric, directory=dir_for_evaluations):
        """ Writes the summary of an evaluation to a text file (at some diectory)."""
        
        summary = self.summary()
        confusion_matrix = self.confusion_matrix()
        best_sentences = "\n".join([f"Best sentences ({metric}):"] + self.best_cases(metric, 5))
        worst_sentences = "\n".join([f"Worst sentences ({metric}):"] + self.worst_cases(metric, 5))
        
        output_to_save = summary + "\n" + confusion_matrix + "\n" + best_sentences + "\n" + worst_sentences
        
        with open(f"{directory}{self.name}_{metric}.txt", "w") as e:
            e.write(output_to_save)
    
    def print_summary(self):
        """ Prints out the summary of an evaluation.
        """
        summary = self.summary()
        print(summary)
        
    def print_confusion_matrix(self):
        """ Prints out the confusion matrix.
        """
        c_matrix = self.confusion_matrix()
        print(c_matrix)

In [16]:
def evaluator(model, name, test_data = test, srl_labels = lst_labels, detach_me=False):
    """ 
    """
    t1 = time.perf_counter()
    
    model.eval()
    
    evaluation = Evaluation(name)
    
    prediction_pooled = [] # to collect all predictions
    truth_pooled = []      # to collect all true labels
    seq_lengths = []       # to collect the length of sentences
    confusion = {label: {label: 0 for label in srl_labels} for label in srl_labels} # for confusion matrix
    metrics_calc = {"accuracy": [], "f1_macro": [], "f1_weighted": []} # to collect accuracy and f1 for every sentence
    
    #i=1 # in order to print out progress
    for batch in test_data:
        sentence = batch.sentence
        predicate = batch.predicate
        truth = batch.srlabel
            
        if detach_me == True: # to avoid some CUDA memory shortage issues
            prediction = model(sentence, predicate).detach().to("cpu")
            truth = batch.srlabel.detach().to("cpu")
        
        else:
            prediction = model(sentence, predicate) 
        
        batched_pred_labels = prediction.argmax(2) 
        batched_true_labels = truth
        
        bsz = batched_pred_labels.shape[0]
        
        for b in range(bsz):
            lst_sent    = [vocab.itos[token] for token in sentence[b]]
            str_sent    = " ".join(lst_sent) 
            seq_len     = len([x for x in sentence[b] if vocab.itos[x] not in ["<pad>", "<sos>", "<eos>"]])
            pred_labels = batched_pred_labels[b].tolist()
            true_labels = batched_true_labels[b].tolist()
            lst_lab     = [labels.itos[token] for token in pred_labels]
            annot_sent  = " ".join([f"{w}/{sr}" for w, sr in zip(lst_sent, lst_lab)])
            
            #accuracy, f1_macro, f1_micro, X = metrics(true_labels, pred_labels)
            accuracy, f1_macro, f1_weighted = metrics(true_labels, pred_labels)
            
            prediction_pooled.extend(pred_labels)
            truth_pooled.extend(true_labels)
            seq_lengths.append(seq_len)
            
            for p, t in zip(pred_labels, true_labels):
                confusion[srl_labels[p]][srl_labels[t]] += 1
                
            for m, v in zip(["accuracy", "f1_macro", "f1_weighted"], [accuracy, f1_macro, f1_weighted]):
                metrics_calc[m].append( (f"{str_sent}\n{annot_sent}", v) )
    
    #print(prediction_pooled)
    
    #pooled_accuracy, pooled_f1_macro, pooled_f1_micro, X = metrics(truth_pooled, prediction_pooled)
    pooled_accuracy, pooled_f1_macro, pooled_f1_weighted = metrics(truth_pooled, prediction_pooled)

    lst_accuracy    = list(zip(*metrics_calc["accuracy"]))[1]
    lst_f1_macro    = list(zip(*metrics_calc["f1_macro"]))[1]
    lst_f1_weighted = list(zip(*metrics_calc["f1_weighted"]))[1]

    evaluation.pooled_acc         = pooled_accuracy
    evaluation.pooled_f1_macro    = pooled_f1_macro
    evaluation.pooled_f1_weighted = pooled_f1_weighted 

    evaluation.mean_acc         = mean(lst_accuracy)
    evaluation.mean_f1_macro    = mean(lst_f1_macro)
    evaluation.mean_f1_weighted = mean(lst_f1_weighted)
    
    evaluation.corr_l_acc         = np.corrcoef(lst_accuracy, seq_lengths)[0][1] # double zero indices due to output of numpy.corrcoef
    evaluation.corr_l_f1_macro    = np.corrcoef(lst_f1_macro, seq_lengths)[0][1]
    evaluation.corr_l_f1_weighted = np.corrcoef(lst_f1_weighted, seq_lengths)[0][1]

    evaluation.confusion    = confusion
    evaluation.metrics_dict = metrics_calc
    
    t2 = time.perf_counter()
    passed_time = t2 - t1
    print("Done! ({} m., {} s.)".format(int(passed_time/60), int(passed_time%60)))
    
    return evaluation

In [17]:
simsrl_evaluation = evaluator(SimpleModel, model_name, detach_me = True)

Done! (1 m., 9 s.)


In [18]:
simsrl_evaluation.print_summary()

Model simple_b64ep10ly4em512do2lr01_csample performs as follows:
Pooled Accuracy: 0.008698447480246363
Pooled F1_macro: 0.00023306586396465879
Pooled F1_weighted: 0.01724687393338475
Mean Accuracy: 0.005419621675712997 (std = 0.016489233733082368)
Mean F1_macro: 0.0012722763876300662 (std = 0.003942717454644402)
Mean F1_weighted: 0.010294713987881536 (std = 0.02972268111821122)
Correlation sentence length and accuracy: 0.22561381824654003
Correlation sentence length and F1_macro: 0.22123422121514558
Correlation sentence length and F1_weighted: 0.24148233958945806


In [None]:
simsrl_evaluation.print_confusion_matrix()

In [19]:
simsrl_evaluation.best_case("f1_macro")

'<start> published 1989 . <end> <pad> <pad>\n<start>/<pad> published/<pad> 1989/<pad> ./<pad> <end>/<pad> <pad>/<pad> <pad>/<pad>'

In [None]:
simsrl_evaluation.best_cases("accuracy", 3)

In [20]:
simsrl_evaluation.worst_case("f1_macro")

'<start> get the wheel .. . <end>\n<start>/<pad> get/<pad> the/<pad> wheel/<pad> ../<pad> ./<pad> <end>/<pad>'

In [None]:
simsrl_evaluation.worst_cases("accuracy", 3)

In [None]:
simsrl_evaluation.save("accuracy")
simsrl_evaluation.save("f1_macro")
simsrl_evaluation.save("f1_weighted")

# Model 2: seq2seq

In [None]:
class SRL_Encoder(nn.Module):
    def __init__(self, voc_size, embedding_size, hidden_size):  
        super(SRL_Encoder, self).__init__()
        
        self.embeddings = nn.Embedding(voc_size, embedding_size)
        self.sp_pair = embedding_size + 1 # emedded sentence + predicate vector
        self.rnn = nn.LSTM(self.sp_pair, hidden_size, bidirectional=True, batch_first=True)
        
    def forward(self, sentences, pred_vec):
        
        embeddings = self.embeddings(sentences)
        pred_vec = pred_vec.unsqueeze(2)        
        sentence_pred_pair = torch.cat((embeddings, pred_vec), dim=2)
        contextualized_embedding, (hidden_final, cell_final) = self.rnn(sentence_pred_pair)
        
        return contextualized_embedding, (hidden_final, cell_final)
    
    def initHidden(self): # ?
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
# attention?

In [None]:
class SRL_Decoder(nn.Module):
    def __init__(self, xxx, xxx, hidden_size):  
        super(SRL_Decoder, self).__init__()
        
        self.embeddings = nn.Embedding(voc_size, embedding_size)
        self.rnn = nn.LSTM(xxx, hidden_size, bidirectional=True, batch_first=True)
        
    def forward(self, sentences, pred_vec):
        
        embeddings = self.embeddings(sentences)
        pred_vec = pred_vec.unsqueeze(2)        
        sentence_pred_pair = torch.cat((embeddings, pred_vec), dim=2)
        contextualized_embedding, (hidden_final, cell_final) = self.rnn(sentence_pred_pair)
        
        
        # from end_hidden_state dim --> n_labels, by linear layes
        # decoder will be called n_len(target sequence) times
        
        return contextualized_embedding, (hidden_final, cell_final)
    
    def initHidden(self): # ?
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
#training

previous_labels = ["start"]

c, (e_hidden, e_cell) = encoder(sentence, pred)



for w in len(target_seq):
    next_label, hidden, cell = decoder(hidden, cell, previous_labels)
    previous_labels.append(nex_label)
    
    loss = cross_entophy(next_label, actual_label)

# Know your enemies; keep until ...

In [None]:

for x in train:
    output = my_model(x.sentence, x.predicate)
    print("op", output.shape)
    soft = F.softmax(output, dim=2)
    print(torch.argmax(soft, dim=2))
    #print("sm", soft.shape)
    #print(torch.sum(soft, dim=2).shape)
