# Initial setup

## Create your Hugging Face read token
You can generate one by creating an account on hugging face, go to settings, access tokens and create a read token. Afterwards, add it below.

## Download the glove-100 embedding
Download the glove-100 embedding (glove.6B.100d.txt) and paste in in the same folder with this noteobok. You can download it form the following link: [Glove](https://nlp.stanford.edu/projects/glove/)

## Login to Hugging Face using your token by running the code below:

In [1]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Import required packages

In [2]:
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
import numpy as np

# Self Attention Model

In [3]:
class SelfAttentionModel(nn.Module):
    def __init__(self, num_classes, embedding_matrix):
        super(SelfAttentionModel, self).__init__()

        self.embedding_dim = 100
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=True)
        self.self_attention = nn.MultiheadAttention(embed_dim=self.embedding_dim, num_heads=5)
        self.fc = nn.Linear(self.embedding_dim, num_classes)

    def forward(self, input_ids):
        embedded_input = self.embedding(input_ids)
        attention_output, _ = self.self_attention(embedded_input, embedded_input, embedded_input)
        attention_output = attention_output.permute(1, 0, 2)
        summed_output = torch.sum(attention_output, dim=1)
        logits = self.fc(summed_output)
        return logits

# Dataset for SST-2

In [4]:
class SST2Dataset(Dataset):
    def __init__(self, max_length):
        self.max_length = max_length
        self.dataset = self.load_dataset()
        self.embedding_matrix = self.load_embedding_matrix()

    def load_dataset(self):
        dataset = load_dataset("sst2")
        return dataset

    def load_embedding_matrix(self):
        word2idx = {}
        embedding_matrix = []

        with open('glove.6B.100d.txt', 'r', encoding='utf-8') as f:
            for line in f:
                values = line.split()
                word = values[0]
                vector = np.array(values[1:], dtype='float32')

                word2idx[word] = len(word2idx)
                embedding_matrix.append(vector)

        self.word2idx = word2idx  # Store word2idx in self.word2idx

        return torch.tensor(embedding_matrix, dtype=torch.float32)

    def __len__(self):
        return len(self.dataset['train'])

    def __getitem__(self, idx):
        example = self.dataset['train'][idx]
        input_text = example['sentence']
        label = example['label']

        input_ids = [self.embedding_matrix.size(0) - 1] * self.max_length
        for i, word in enumerate(input_text.split()[:self.max_length]):
            if word in self.word2idx:
                input_ids[i] = self.word2idx[word]

        return torch.tensor(input_ids), label



# Set-up device (cpu or gpu)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define hyperparameters

In [13]:
num_classes = 2  # Assume binary classification (positive or negative sentiment)
max_length = 128  # Maximum sequence length for input
batch_size_local = 128
num_epochs = 5
learning_rate = 0.001

train_size_proportion = 0.8

# Load the model

In [7]:
# Load the model
dataset = SST2Dataset(max_length)
model = SelfAttentionModel(num_classes, dataset.embedding_matrix).to(device)

Found cached dataset sst2 (C:/Users/andre/.cache/huggingface/datasets/sst2/default/2.0.0/9896208a8d85db057ac50c72282bcb8fe755accc671a57dd8059d4e130961ed5)


  0%|          | 0/3 [00:00<?, ?it/s]

# Split dataset into train and test sets

In [8]:
# Split the dataset into train and test sets
train_size = int(train_size_proportion * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Define the loss function and optimizer

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Data Loaders

In [10]:
train_loader = DataLoader(train_dataset, batch_size=batch_size_local, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size_local, shuffle=False, drop_last=True)

# Train Loop

In [None]:
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluation on the test set
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)

            # Update total and correct predictions
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Epoch {epoch + 1}/{num_epochs}, Test Accuracy: {accuracy:.2f}%")

Epoch 1/5, Test Accuracy: 55.20%


# Save model

In [1]:
# torch.save(model.state_dict(), 'self_attention.pth')

# Use the hugginface API to export model
trainer.save_model("self_attention")

NameError: name 'trainer' is not defined