In [1]:
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AdamW, AutoModelForQuestionAnswering, AutoTokenizer
import os
import re
import string
from tqdm import tqdm
from collections import Counter
import matplotlib.pyplot as plt
import import_ipynb
import extract_transform_evaluate as fn

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def train_model(self, dataloader, optimizer):
    self.train()
    accuracy_arr, loss_arr = [], []
    for batch in tqdm(dataloader, desc='Training'):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(self.device)
        attention_mask = batch['attention_mask'].to(self.device)
        start_pos = batch['start_positions'].to(self.device)
        end_pos = batch['end_positions'].to(self.device)
        start_res, end_res = self(input_ids=input_ids, attention_mask=attention_mask)
        loss = self.find_focal_loss(start_res, end_res, start_pos, end_pos)
        loss_arr.append(loss.item())
        loss.backward()
        optimizer.step()

        start_pred = torch.argmax(start_res, dim=1)
        end_pred = torch.argmax(end_res, dim=1)

        accuracy_arr.append(((start_pred == start_pos).sum()/len(start_pred)).item())
        accuracy_arr.append(((end_pred == end_pos).sum()/len(end_pred)).item())

    return sum(accuracy_arr)/len(accuracy_arr), sum(loss_arr)/len(loss_arr)

def plot_metrics(epochs, train_losses, train_accuracies, wer_scores, f1_scores):
    metrics = {
        'Training Loss': train_losses,
        'Training Accuracy': train_accuracies,
        'Word Error Rate (WER)': wer_scores,
        'F1 Score': f1_scores
    }

    colors = ['blue','green','red','brown']
    plt.figure(figsize=(12, 8))
    
    # Loop over metrics dictionary to create subplots
    for i, (title, values) in enumerate(metrics.items(), start=1):
        plt.subplot(2, 2, i)
        plt.plot(epochs, values, marker='o', linestyle='-', color=colors[i-1])
        plt.title(title)
        plt.xlabel('Epoch')
        plt.ylabel(title.split()[1])
    
    plt.tight_layout()
    plt.show()

In [2]:
#Noise V1
train_data = '../spoken_train-v1.1.json'
test_data = '../spoken_test-v1.1_WER44.json'
MODEL_PATH = "distilbert-base-uncased"
MODEL_SAVE_PATH = "qa_simple_model"

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

train_encodings = fn.collect_and_find_positions(train_data, tokenizer, 512)
test_encodings = fn.collect_and_find_positions(test_data, tokenizer, 512)

train_set = fn.SpokenSquad(train_encodings)
test_set = fn.SpokenSquad(test_encodings)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1)

distilbert_base_uncased__model = AutoModelForQuestionAnswering.from_pretrained(MODEL_PATH)
qa_model = fn.QAModel(distilbert_base_uncased__model, device)
optimizer = AdamW(qa_model.parameters(), lr=2e-5, weight_decay=2e-2)

print("Current working directory:", os.getcwd())
print("MODEL_SAVE_PATH:", MODEL_SAVE_PATH)
print("Directory exists:", os.path.isdir(MODEL_SAVE_PATH)) 

if os.path.isdir(MODEL_SAVE_PATH):
    # Just Evaluate
    qa_model.load_state_dict(torch.load(os.path.join(MODEL_SAVE_PATH, 'model_weights.pt')))
    qa_model.to(device)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
    print(f"{MODEL_SAVE_PATH} Model loaded!")
    avg_f1_score, wer_score = qa_model.evaluate_model(test_loader, tokenizer)
    print(f"WER Score: {wer_score}")
    print(f"F1 Score: {avg_f1_score}")
else:
    # Train and Evaluate
    wer_scores, accuracies, losses, f1_scores = [], [], [], []
    epoch = 0
    while epoch < 6:
        print(f'Epoch - {epoch + 1}')
        accuracy, loss = train_model(qa_model, train_loader, optimizer)
        accuracies.append(accuracy)
        losses.append(loss)
        print(f"Train Accuracy: {accuracy} and Train Loss: {loss}")
    
        avg_f1_score, wer_score = qa_model.evaluate_model(test_loader, tokenizer)
        f1_scores.append(avg_f1_score)
        wer_scores.append(wer_score)
        print(f"F1 Score: {avg_f1_score} and WER Score: {wer_score}")
    
        epoch += 1
    os.makedirs(MODEL_SAVE_PATH)
    torch.save(qa_model.state_dict(), os.path.join(MODEL_SAVE_PATH, 'model_weights.pt'))
    tokenizer.save_pretrained(MODEL_SAVE_PATH)
    plot_metrics(range(1, 7), losses, accuracies, wer_scores, f1_scores)

Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
  qa_model.load_state_dict(torch.load(os.path.join(MODEL_SAVE_PATH, 'model_weights.pt')))


Current working directory: /home/lmadipa/HW3/Simple
MODEL_SAVE_PATH: qa_simple_model
Directory exists: True
qa_simple_model Model loaded!


Evaluating Model!: 100%|██████████| 17841/17841 [01:30<00:00, 197.48it/s]


WER Score: 2.9742701759786567
F1 Score: 0.37362068735484877


In [3]:
#Noise V2
train_data = '../spoken_train-v1.1.json'
test_data = '../spoken_test-v1.1_WER54.json'
MODEL_PATH = "distilbert-base-uncased"
MODEL_SAVE_PATH = "qa_simple_model"

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

train_encodings = fn.collect_and_find_positions(train_data, tokenizer, 512)
test_encodings = fn.collect_and_find_positions(test_data, tokenizer, 512)

train_set = fn.SpokenSquad(train_encodings)
test_set = fn.SpokenSquad(test_encodings)
train_loader = DataLoader(train_set, batch_size=16, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1)

distilbert_base_uncased__model = AutoModelForQuestionAnswering.from_pretrained(MODEL_PATH)
qa_model = fn.QAModel(distilbert_base_uncased__model, device)
optimizer = AdamW(qa_model.parameters(), lr=2e-5, weight_decay=2e-2)

if not os.path.isdir(MODEL_SAVE_PATH):
    #Train and Evaluate
    wer_scores, accuracies, losses, f1_scores  = [], [], [], []
    epoch = 0
    while epoch < 6:
        print(f'Epoch - {epoch + 1}')
        
        accuracy, loss = bertFn.train_model(qa_model, train_loader, optimizer)
        accuracies.append(accuracy)
        losses.append(loss)
        print(f"Train Accuracy: {accuracy} and Train Loss: {loss}")
    
        avg_f1_score, wer_score = qa_model.evaluate_model(test_loader, tokenizer)
        f1_scores.append(avg_f1_score)
        wer_scores.append(wer_score)
        print(f"F1 Score: {avg_f1_score} and WER Score: {wer_score}")
    
        epoch += 1
    os.makedirs(MODEL_SAVE_PATH, exist_ok=True)
    torch.save(qa_model.state_dict(), os.path.join(MODEL_SAVE_PATH, 'model_weights.pt'))
    tokenizer.save_pretrained(MODEL_SAVE_PATH)
    bertFn.plot_metrics(range(1, 7), losses, accuracies, wer_scores, f1_scores)
else:
    #Just Evaluate
    qa_model.load_state_dict(torch.load(os.path.join(MODEL_SAVE_PATH, 'model_weights.pt')))
    qa_model.to(device)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_SAVE_PATH)
    print(f"{MODEL_SAVE_PATH} Model loaded!")
    avg_f1_score, wer_score = qa_model.evaluate_model(test_loader, tokenizer)

    print(f"WER Score: {wer_score}")
    print(f"F1 Score: {avg_f1_score}")

Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  qa_model.load_state_dict(torch.load(os.path.join(MODEL_SAVE_PATH, 'model_weights.pt')))


qa_simple_model Model loaded!


Evaluating Model!: 100%|██████████| 17841/17841 [01:30<00:00, 197.86it/s]


WER Score: 4.083768656716418
F1 Score: 0.28788094505899353
