In [1]:
import json
from pathlib import Path
from jsonargparse import CLI
from sentence_transformers import SentenceTransformer

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import f1_score
import numpy as np
from sklearn.model_selection import train_test_split
import random
import torch.nn.functional as F
import torchtext
import matplotlib.pyplot as plt
from torch.nn.utils.rnn import pad_sequence

In [2]:
def flatten(list_of_list):
    return [item for sublist in list_of_list for item in sublist]

path_to_training = Path("training")
path_to_test = Path("test")

In [3]:
#####
# training and test sets of transcription ids
#####
training_set = ['ES2002', 'ES2005', 'ES2006', 'ES2007', 'ES2008', 'ES2009', 'ES2010', 'ES2012', 'ES2013', 'ES2015', 'ES2016', 'IS1000', 'IS1001', 'IS1002', 'IS1003', 'IS1004', 'IS1005', 'IS1006', 'IS1007', 'TS3005', 'TS3008', 'TS3009', 'TS3010', 'TS3011', 'TS3012']
training_set = flatten([[m_id+s_id for s_id in 'abcd'] for m_id in training_set])
training_set.remove('IS1002a')
training_set.remove('IS1005d')
training_set.remove('TS3012c')

test_set = ['ES2003', 'ES2004', 'ES2011', 'ES2014', 'IS1008', 'IS1009', 'TS3003', 'TS3004', 'TS3006', 'TS3007']
test_set = flatten([[m_id+s_id for s_id in 'abcd'] for m_id in test_set])

In [4]:
y_training = []
list_speaker = ['PM','ME','ID','UI']

with open("training_labels.json", "r") as file:
    training_labels = json.load(file)

X_training_text = []
X_training_speaker = []
for transcription_id in training_set:
    with open(path_to_training / f"{transcription_id}.json", "r") as file:
        transcription = json.load(file)
    
    for utterance in transcription:
        X_training_text.append(utterance["text"])
        speaker_one_hot = [0,0,0,0]
        speaker_one_hot[list_speaker.index(utterance["speaker"])] = 1
        X_training_speaker.append(speaker_one_hot)
    
    y_training += training_labels[transcription_id]

In [5]:
def split_sentence(sentence):
    sentence = sentence.replace(".", " . ") \
                 .replace(",", " , ") \
                 .replace(";", " ; ") \
                 .replace("?", " ? ")
    return sentence.lower().split()

In [6]:
glove = torchtext.vocab.GloVe(name="6B", dim=50)

.vector_cache\glove.6B.zip:   1%|          | 8.94M/862M [00:16<26:06, 545kB/s]    


KeyboardInterrupt: 

In [None]:
def vectorize_text(glove_vector):
    train, valid = [], []
    train_speaker, valid_speaker = [], []
    y_train, y_val = [], []
    for i in range(len(y_training)):
        sentence = X_training_text[i]
        idxs = [glove_vector.stoi[w] 
                for w in split_sentence(sentence)
                if w in glove_vector.stoi]
        if not idxs:
            continue
        idxs = torch.tensor(idxs)
        if i % 5 < 4:
            train.append(idxs)
            train_speaker.append(X_training_speaker[i])
            y_train.append(y_training[i])
        else:
            valid.append(idxs)
            valid_speaker.append(X_training_speaker[i])
            y_val.append(y_training[i])
    train_speaker = torch.tensor(train_speaker, dtype=torch.float32) 
    valid_speaker = torch.tensor(valid_speaker, dtype=torch.float32) 
    y_train = torch.tensor(y_train)
    y_val = torch.tensor(y_val)
    return train, valid, train_speaker, valid_speaker, y_train, y_val

train, valid, train_speaker, valid_speaker, y_train, y_val = vectorize_text(glove)

In [None]:
class Linear(nn.Module):
    def __init__(self, hidden_dim):
        super(Linear, self).__init__()
        
        self.emb = nn.Embedding.from_pretrained(glove.vectors)
        
        self.fc1 = nn.Linear(67 * 50, hidden_dim) 
        
        self.fc2 = nn.Linear(hidden_dim, 1) 

    def forward(self, text, speaker):

        embedded = self.emb(text)
        
        if embedded.size(1) != 67:
    
            embedded = embedded[: , 0: 67, :]

        embedded = embedded.view(-1, 67 * 50)

        out = self.fc1(embedded)
        
        out = torch.relu(out)

        out = self.fc2(out)
        
        out = torch.sigmoid(out)

        return out

In [None]:
def sentence_batcher(sentences, speakers, y, batch_size=64, drop_last=False):
    padded_sentences = pad_sequence(sentences, batch_first=True, padding_value=0)
    dataset = TensorDataset(padded_sentences, speakers, y)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=drop_last)
    return loader

In [None]:
train_loader = sentence_batcher(train, train_speaker, y_train, batch_size=64, drop_last=True)
val_loader = sentence_batcher(valid, valid_speaker, y_val, batch_size=64, drop_last=True)

In [None]:
best_f1 = 0
def train_model(model, lr, best_f1):
    loss_criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    num_epochs = 50
    history_val_f1 = []
    for epoch in range(num_epochs):
        loss_tot = 0
        iter = 0
        for text, speaker, y in train_loader:
            model.train()
            optimizer.zero_grad()
            outputs = model(text, speaker)
            loss = loss_criterion(outputs.squeeze(1), y.float())
            loss_tot += loss
            iter += 1
            loss.backward()
            optimizer.step()              
        
        model.eval()

        true_labels = []
        predicted_labels = []

        with torch.no_grad():
            for text, speaker, y in val_loader:
                outputs = model(text, speaker)
                predicted_labels.extend(outputs.cpu().numpy())
                true_labels.extend(y.cpu().numpy())

        predicted_labels1 = [1 if pred > 0.3 else 0 for pred in predicted_labels]
        predicted_labels2 = [1 if pred > 0.20 else 0 for pred in predicted_labels]
        predicted_labels3 = [1 if pred > 0.4 else 0 for pred in predicted_labels]
        loss_tot = loss_tot / iter

        f11 = f1_score(true_labels, predicted_labels1)
        f12 = f1_score(true_labels, predicted_labels2)
        f13 = f1_score(true_labels, predicted_labels3)

        print(f'{epoch} val Set Evaluation - F1 Score: {f11}' + f'     loss : {loss_tot}')
        print(f'{epoch} val Set Evaluation - F1 Score: {f12}' + f'     loss : {loss_tot}')
        print(f'{epoch} val Set Evaluation - F1 Score: {f13}' + f'     loss : {loss_tot}')
        print("")

        history_val_f1.append(f11)

        if f11 > best_f1:
            best_f1 = f11
            torch.save(model.state_dict(), 'best_model.pth')

    print(best_f1)
    return (history_val_f1, best_f1)

In [None]:
model = Linear(32)

In [None]:
train_model(model, 0.001, best_f1)

0 val Set Evaluation - F1 Score: 0.573048408198866     loss : 0.3578175902366638
0 val Set Evaluation - F1 Score: 0.541958041958042     loss : 0.3578175902366638
0 val Set Evaluation - F1 Score: 0.5398262128892106     loss : 0.3578175902366638

1 val Set Evaluation - F1 Score: 0.5669367909238249     loss : 0.34129929542541504
1 val Set Evaluation - F1 Score: 0.5602140945584301     loss : 0.34129929542541504
1 val Set Evaluation - F1 Score: 0.5059288537549407     loss : 0.34129929542541504

2 val Set Evaluation - F1 Score: 0.5672402804333971     loss : 0.33774206042289734
2 val Set Evaluation - F1 Score: 0.554364471669219     loss : 0.33774206042289734
2 val Set Evaluation - F1 Score: 0.5095993322203672     loss : 0.33774206042289734

3 val Set Evaluation - F1 Score: 0.5664777327935222     loss : 0.3366454839706421
3 val Set Evaluation - F1 Score: 0.5660377358490566     loss : 0.3366454839706421
3 val Set Evaluation - F1 Score: 0.5123456790123457     loss : 0.3366454839706421

4 val Set

KeyboardInterrupt: 