In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch_optimizer as optim2

In [3]:
dataset = joblib.load('encodings.joblib')
y = joblib.load('genres.joblib')

In [4]:
x = []
for tensor in dataset:
    x.append(tensor)
    
X = torch.stack(x)
X

tensor([[ 0.0349,  0.0235, -0.0252,  ...,  0.0743, -0.0681,  0.0263],
        [-0.0059, -0.0665, -0.0198,  ...,  0.0336, -0.0681, -0.0098],
        [ 0.0332, -0.0196, -0.0088,  ...,  0.0797, -0.0398, -0.0545],
        ...,
        [-0.0506, -0.0171,  0.0425,  ...,  0.0814, -0.0385, -0.0431],
        [-0.0218,  0.0245, -0.0527,  ...,  0.0152,  0.0071,  0.0260],
        [ 0.0174,  0.0035, -0.0033,  ...,  0.0411, -0.0065, -0.0919]])

In [5]:
Y = y.values
Y = torch.tensor(Y, dtype=torch.float32) 
Y

tensor([[0., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 1., 0., 0.]])

In [6]:
print(f'size of input data = {X.shape}')
print(f'size of output data = {Y.shape}')

size of input data = torch.Size([7728, 384])
size of output data = torch.Size([7728, 25])


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [8]:
class GenreDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_dataset = GenreDataset(X_train, y_train)
test_dataset = GenreDataset(X_test, y_test)
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
class GenreClassifier(nn.Module):
    def __init__(self, input_size, output_size):
        super(GenreClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, output_size)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

In [10]:
input_size = X.shape[1]
output_size = Y.shape[1]
model = GenreClassifier(input_size, output_size)
criterion = nn.BCELoss()
optimizer = optim2.Ranger(model.parameters(), lr=0.001)

In [11]:
num_epochs = 250
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss}")

	addcmul_(Number value, Tensor tensor1, Tensor tensor2)
Consider using one of the following signatures instead:
	addcmul_(Tensor tensor1, Tensor tensor2, *, Number value) (Triggered internally at ..\torch\csrc\utils\python_arg_parser.cpp:1578.)
  exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)


Epoch [1/250], Loss: 109.38300186395645
Epoch [2/250], Loss: 42.194820553064346
Epoch [3/250], Loss: 41.14140249788761
Epoch [4/250], Loss: 39.15633903443813
Epoch [5/250], Loss: 36.85499057173729
Epoch [6/250], Loss: 34.694492012262344
Epoch [7/250], Loss: 32.79312641173601
Epoch [8/250], Loss: 31.565377414226532
Epoch [9/250], Loss: 30.687852554023266
Epoch [10/250], Loss: 29.917442746460438
Epoch [11/250], Loss: 29.350926779210567
Epoch [12/250], Loss: 28.871544167399406
Epoch [13/250], Loss: 28.516992829740047
Epoch [14/250], Loss: 28.095064237713814
Epoch [15/250], Loss: 27.768005162477493
Epoch [16/250], Loss: 27.467013090848923
Epoch [17/250], Loss: 27.05176118016243
Epoch [18/250], Loss: 26.66102433949709
Epoch [19/250], Loss: 26.38297414779663
Epoch [20/250], Loss: 25.973081350326538
Epoch [21/250], Loss: 25.617038920521736
Epoch [22/250], Loss: 25.17306437343359
Epoch [23/250], Loss: 24.773290403187275
Epoch [24/250], Loss: 24.307392455637455
Epoch [25/250], Loss: 23.88861823

In [12]:
model.eval()
genre_scores = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        probabilities = torch.sigmoid(outputs)
        genre_score = probabilities * 10
        genre_scores.append(genre_score.numpy())

# Concatenate all batch results
genre_score_array = np.concatenate(genre_scores, axis=0)

In [13]:
genre_labels = y.columns.tolist()
score_df = pd.DataFrame(genre_score_array, columns=genre_labels)

In [14]:
score_df

Unnamed: 0,Action,Adult,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,...,Musical,Mystery,News,None,Sci-Fi,Short,Sport,Thriller,War,Western
0,5.000000,5.000000,5.194819,7.306678,5.000000,5.023396,5.000000,5.000000,5.000000,7.159925,...,5.000029,5.000000,5.000000,5.000000,5.000000,7.219317,5.0,5.000000,5.000000,5.000002
1,5.000000,5.000017,5.000000,5.000000,5.000000,7.309942,5.000007,5.000000,5.000214,5.000000,...,5.000000,5.000296,5.000000,5.000000,5.000002,5.000000,5.0,6.974951,5.000000,5.000000
2,5.000000,5.000000,5.000000,5.000000,5.000000,7.212441,5.000628,5.000000,6.922373,5.000671,...,5.000000,5.000012,5.000000,5.000000,5.000000,5.000000,5.0,5.000699,5.000000,5.000000
3,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.002545,5.000000,...,5.000000,5.000351,5.000000,5.000000,7.228055,5.003295,5.0,6.233229,5.000000,5.000000
4,5.000000,5.000000,5.000000,5.000000,5.000012,5.000031,5.663621,5.001967,5.279696,5.000000,...,5.000000,5.052836,5.000000,5.000002,5.000000,5.000004,5.0,6.952491,5.000000,5.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1541,5.052267,5.000000,5.002139,5.000000,5.000000,5.000000,5.000033,5.000000,7.287879,5.000000,...,5.000000,7.142103,5.000000,5.000000,5.000001,5.000000,5.0,5.007449,5.000000,5.000000
1542,5.000000,5.000000,5.000000,5.000000,5.000000,5.000029,5.000000,5.000000,5.882365,7.302532,...,5.000000,5.000000,5.000000,5.000000,5.000000,5.000021,5.0,5.000000,5.000000,5.000000
1543,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.588245,5.221994,5.000000,...,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,5.0,5.000000,5.000217,5.000000
1544,5.000000,5.000000,5.000048,5.000000,5.000000,5.000000,5.020325,7.310586,5.000000,5.000000,...,5.000000,5.000003,5.000100,5.000000,5.000000,5.000000,5.0,5.000000,5.000000,5.000000


In [15]:
ytest = np.array(y_test)

In [17]:
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    }, 'scoring_model_1.pth')

print("Model saved successfully.")

Model saved successfully.
