# Evaluating and Testing a Language Model

##  Import Necessary Libraries

In this step, we import the necessary libraries for evaluating and testing a language model.

In [3]:
import pandas as pd
import numpy as np
import nltk
from nltk.util import ngrams
from collections import defaultdict
from random import random
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
import nltk
from nltk.util import ngrams
from collections import defaultdict
from random import random
import torch
import torch.nn as nn
import torch.optim as optim

## Load and Preprocess Data

In this step, we load the train and test data that we will be using to evaluate and test the language model and preprocess it by tokenizing and removing stop words.

In [4]:
# Load data into a Pandas dataframe
train_data_path = './datasets/train.csv'
test_data_path = './datasets/test.csv'
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

# Preprocess data
stop_words = set(nltk.corpus.stopwords.words('english'))

def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [token for token in tokens if token.isalpha() and token not in stop_words]
    return tokens

train_data['tokens'] = train_data['text'].apply(preprocess_text)
test_data['tokens'] = test_data['text'].apply(preprocess_text)

## Define a Custom Dataset and DataLoader

In this step, we define a custom dataset and data loader for the language model, using a sliding window approach to generate sequences of a fixed length.

In [5]:
class LanguageModelDataset(Dataset):
    def __init__(self, data, seq_len):
        self.data = [word for tokens in data for word in tokens]
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        x = self.data[idx:idx+self.seq_len]
        y = self.data[idx+self.seq_len]
        return x, y

train_dataset = LanguageModelDataset(train_data['tokens'], 50)
train_loader = DataLoader(train_dataset, shuffle=True)

##  Define a Function to Evaluate Language Models

In this step, we define a function to evaluate language models using a given dataset and criterion. This function computes the loss and perplexity of the model on the given dataset.

In [6]:
def evaluate(model, data, criterion, device):
    model.eval()
    total_loss = 0
    n_tokens = 0
    with torch.no_grad():
        for tokens, targets in data:
            tokens = torch.LongTensor(tokens).to(device)
            targets = torch.LongTensor(targets).to(device)

            h = (torch.zeros(model.lstm.num_layers, tokens.shape[0], model.lstm.hidden_size).to(device),
                 torch.zeros(model.lstm.num_layers, tokens.shape[0], model.lstm.hidden_size).to(device))

            output, h = model(tokens, h)
            loss = criterion(output, targets.view(-1))
            total_loss += loss.item() * len(tokens)
            n_tokens += len(tokens)
    avg_loss = total_loss / n_tokens
    perplexity = np.exp(avg_loss)
    return perplexity

## Define a Function to Test Language Models

In this step, we define a function to test language models using a given dataset. This function computes the accuracy of the model on the given dataset.

In [7]:
def test(model, data, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for tokens, target in data:
            tokens = torch.LongTensor(tokens).to(device)
            target = torch.LongTensor(target).to(device)

            h = (torch.zeros(model.lstm.num_layers, tokens.shape[0], model.lstm.hidden_size).to(device),
                 torch.zeros(model.lstm.num_layers, tokens.shape[0], model.lstm.hidden_size).to(device))

            output, h = model(tokens, h)
            predictions = torch.argmax(output, dim=1)
            correct += (predictions == target).sum().item()
            total += len(target)
    accuracy = correct / total
    return accuracy


## Load trained model

In [8]:
class LanguageModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers=1):
        super(LanguageModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, h):
        x = self.embedding(x)
        x, h = self.lstm(x, h)
        x = x.contiguous().view(-1, x.shape[2])
        x = self.fc(x)
        return x, h
    
vocab_size = 257
embedding_dim = 100
hidden_dim = 256
num_layers = 2
learning_rate = 0.001
epochs = 10
batch_size = 128
sequence_length = 50
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
model_path = 'model.pt'
model = LanguageModel(vocab_size, embedding_dim, hidden_dim, num_layers).to(device)
model.load_state_dict(torch.load(model_path))

<All keys matched successfully>

## Try out LLM's Inferencing using Hugging Face LLMs

In [17]:
# https://huggingface.co/docs/transformers/en/llm_tutorial