### Sentiment Analysis with Self Attention

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

class SelfAttention(nn.Module):
    def __init__(self, embed_size, heads):
        super(SelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        assert (
            self.head_dim * heads == embed_size
        ), "Embed size needs to be divisible by heads"

        self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.fc_out = nn.Linear(heads * self.head_dim, embed_size)

    def forward(self, value, key, query):
        N = query.shape[0]
        value_len, key_len, query_len = value.shape[1], key.shape[1], query.shape[1]

        # Split the embedding into multiple heads
        values = self.values(value).view(N, value_len, self.heads, self.head_dim)
        keys = self.keys(key).view(N, key_len, self.heads, self.head_dim)
        queries = self.queries(query).view(N, query_len, self.heads, self.head_dim)

        # Self attention
        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])
        attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)

        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(
            N, query_len, self.heads * self.head_dim
        )

        return self.fc_out(out)

class SentimentClassifier(nn.Module):
    def __init__(self, vocab_size, embed_size, num_heads, num_classes):
        super(SentimentClassifier, self).__init__()
        self.embed = nn.Embedding(vocab_size, embed_size)
        self.self_attention = SelfAttention(embed_size, num_heads)
        self.linear = nn.Linear(embed_size, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        embedding = self.embed(x)
        attention = self.self_attention(embedding, embedding, embedding)
        pooled = F.avg_pool1d(attention.transpose(1,2), kernel_size=attention.size(1)).squeeze(2)
        output = self.dropout(pooled)
        return self.linear(output)

# Dummy dataset
class DummyDataset(Dataset):
    def __init__(self):
        self.items = [
            (torch.tensor([1, 2, 3, 4]), torch.tensor(0)),
            (torch.tensor([4, 3, 2, 1]), torch.tensor(1)),
        ]

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        return self.items[idx]

# Hyperparameters
vocab_size = 10000  # Just as an example
embed_size = 256
num_heads = 8
num_classes = 2
learning_rate = 0.001
batch_size = 2
num_epochs = 10

# Model, loss, and optimizer
model = SentimentClassifier(vocab_size, embed_size, num_heads, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Data loader
dataset = DummyDataset()
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Training loop
for epoch in range(num_epochs):
    for data, targets in loader:
        # Forward pass
        scores = model(data)
        loss = criterion(scores, targets)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

print("Training complete.")


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torch.utils.data import DataLoader
from torchtext.data.functional import to_map_style_dataset
from torch.nn.utils.rnn import pad_sequence

In [2]:
# Tokenization and Vocabulary
tokenizer = get_tokenizer('basic_english')
def yield_tokens(data_iter):
    for _, text in data_iter:
        yield tokenizer(text)

train_iter = IMDB(split='train')
vocab = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"])

In [4]:
# Encode and Pad
def collate_batch(batch):
    label_list, text_list = [], []
    for (_label, _text) in batch:
        label_list.append(torch.tensor(int(_label == 'pos')))
        processed_text = torch.tensor(vocab(tokenizer(_text)), dtype=torch.int64)
        text_list.append(processed_text)
    return torch.tensor(label_list, dtype=torch.int64), pad_sequence(text_list, padding_value=3.0)

train_dataset = to_map_style_dataset(IMDB(split='train'))
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_batch)


In [None]:
# Adjust the vocab_size to match the IMDb dataset vocab
vocab_size = len(vocab)
embed_size = 256
num_heads = 8
num_classes = 2

model = SentimentClassifier(vocab_size, embed_size, num_heads, num_classes)
if torch.cuda.is_available():
    model = model.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

# Training
num_epochs = 5

for epoch in range(num_epochs):
    total_loss = 0
    for labels, texts in train_dataloader:
        if torch.cuda.is_available():
            labels, texts = labels.cuda(), texts.cuda()
        
        optimizer.zero_grad()
        output = model(texts)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    print(f'Epoch: {epoch+1}, Loss: {total_loss / len(train_dataloader)}')

print("Training complete.")
