In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("/Users/sandundesilva/Documents/4th year/Research Project/UI/findMyFilm/flask-server/Models/final/GenreDataset/Final_movie_train.csv")

In [3]:
class Tokenizer:
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.vocab_size = 0
    
    def build_vocab(self, texts):
        for text in texts:
            for word in text.split():
                if word not in self.word2idx:
                    self.word2idx[word] = self.vocab_size
                    self.idx2word[self.vocab_size] = word
                    self.vocab_size += 1

    def text_to_sequence(self, text, max_length):
        sequence = [self.word2idx[word] for word in text.split() if word in self.word2idx]
        sequence = sequence[:max_length] + [0] * (max_length - len(sequence))
        return sequence


In [4]:
class Transformer(nn.Module):
    def __init__(self, vocab_size, d_model, num_heads, num_classes, max_length):
        super(Transformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_length)
        self.transformer_encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer, num_layers=1)
        self.fc = nn.Linear(d_model, num_classes)

    def forward(self, x):
        embedded = self.embedding(x)
        embedded = self.positional_encoding(embedded)
        output = self.transformer_encoder(embedded)
        output = output.mean(dim=1)
        output = self.fc(output)
        return output

In [5]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len):
        super(PositionalEncoding, self).__init__()
        self.encoding = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        self.encoding[:, 0::2] = torch.sin(pos * div_term)
        self.encoding[:, 1::2] = torch.cos(pos * div_term)
        self.encoding = self.encoding.unsqueeze(0)
    
    def forward(self, x):
        return x + self.encoding[:, :x.size(1)].detach()

In [6]:

print(data.columns)

print(data.isnull().sum())

print(data.dtypes)



Index(['id', 'name', 'genre', 'subtitles'], dtype='object')
id           0
name         0
genre        0
subtitles    0
dtype: int64
id            int64
name         object
genre        object
subtitles    object
dtype: object


In [7]:
from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
data['genre'] = label_encoder.fit_transform(data['genre'])


tokenizer = Tokenizer()
tokenizer.build_vocab(data['subtitles'])
max_length = 100 
X = [tokenizer.text_to_sequence(text, max_length) for text in data['subtitles']]
X = torch.tensor(X)
y = torch.tensor(data['genre'])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vocab_size = len(tokenizer.word2idx)
d_model = 128
num_heads = 4
num_classes = len(data['genre'].unique())
model = Transformer(vocab_size, d_model, num_heads, num_classes, max_length)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



In [10]:
print(model)


Transformer(
  (embedding): Embedding(99779, 128)
  (positional_encoding): PositionalEncoding()
  (transformer_encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
    )
    (linear1): Linear(in_features=128, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=128, bias=True)
    (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=

In [17]:

for epoch in range(10): 
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    print(f'Epoch [{epoch+1}/10], Loss: {loss.item()}')


: 

In [None]:

with torch.no_grad():
    model.eval()
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    print(f'Test Accuracy: {accuracy}')