In [17]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, Subset, TensorDataset
import torch
import torchvision.transforms as transforms
import ast
from tqdm import tqdm
import sklearn
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm
from sklearn.metrics import classification_report, hamming_loss,confusion_matrix
import os
import matplotlib.pyplot as plt
from torchinfo import summary
import seaborn as sns

In [2]:
class VGG16MultiLabel(nn.Module):
    def __init__(self, num_classes=28):
        super(VGG16MultiLabel, self).__init__()
        vgg = models.vgg16(weights=models.VGG16_Weights.DEFAULT)
        for param in vgg.features.parameters():
            param.requires_grad = False

        self.features = vgg.features
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 6 * 4, 2056),
            nn.ReLU(),
            nn.Linear(2056, 1024),
            nn.ReLU(),
            nn.Dropout(0.6),
            nn.Linear(1024, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [4]:
class MoviePosterDataset(Dataset):
    def __init__(self, csv_file, image_dir, transform=None, genre_to_idx=None):
        self.data = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transform = transform

        # Convert stringified lists to actual lists
        self.data['genres'] = self.data['genres'].apply(ast.literal_eval)

        # Build genre index mapping if not provided
        self.all_genres = sorted(set(g for genre_list in self.data['genres'] for g in genre_list))
        self.genre_to_idx = genre_to_idx or {genre: idx for idx, genre in enumerate(self.all_genres)}

    def __len__(self):
        return len(self.data)

    def get_genre_idx(self):
        return self.genre_to_idx

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        movie_id = row['movie_id']
        genres = row['genres']

        # Multi-hot encode the genres
        label = torch.zeros(len(self.genre_to_idx))
        for genre in genres:
            if genre in self.genre_to_idx:
                label[self.genre_to_idx[genre]] = 1.0

        # Load image
        img_path = os.path.join(self.image_dir, f"{movie_id}.jpg")
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, label

transform = transforms.Compose([
    transforms.Resize((200,150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

csv_file = "movies_with_posters.csv"
image_dir = "C:/Users/satis/Downloads/datasets/posters"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("using ",device)
dataset = MoviePosterDataset(csv_file, image_dir, transform)
genre_to_idx = dataset.get_genre_idx()

total_size = len(dataset)
indices = list(range(total_size))
train_idx, temp_idx = train_test_split(indices, test_size=0.3, random_state=42)

test_set = Subset(dataset, temp_idx)

def preload_to_gpu(subset, device):
    imgs, labels = [], []
    for img, label in tqdm(subset, desc="Preloading to GPU"):
        imgs.append(img.unsqueeze(0))
        labels.append(label.unsqueeze(0))
    imgs = torch.cat(imgs).to(device)
    labels = torch.cat(labels).to(device)
    print(imgs.device) 
    print(labels.device)
    return TensorDataset(imgs, labels)

test_tensor_dataset = preload_to_gpu(test_set, device)

using  cuda


Preloading to GPU: 100%|██████████| 7252/7252 [01:46<00:00, 68.02it/s] 


cuda:0
cuda:0


In [5]:
model = VGG16MultiLabel(9)
print(summary(model, input_size=(1, 3, 200, 150)))
model.load_state_dict(torch.load('vgg16_genre_model.pth', map_location=torch.device('cpu'), weights_only=True))

Layer (type:depth-idx)                   Output Shape              Param #
VGG16MultiLabel                          [1, 9]                    --
├─Sequential: 1-1                        [1, 512, 6, 4]            --
│    └─Conv2d: 2-1                       [1, 64, 200, 150]         (1,792)
│    └─ReLU: 2-2                         [1, 64, 200, 150]         --
│    └─Conv2d: 2-3                       [1, 64, 200, 150]         (36,928)
│    └─ReLU: 2-4                         [1, 64, 200, 150]         --
│    └─MaxPool2d: 2-5                    [1, 64, 100, 75]          --
│    └─Conv2d: 2-6                       [1, 128, 100, 75]         (73,856)
│    └─ReLU: 2-7                         [1, 128, 100, 75]         --
│    └─Conv2d: 2-8                       [1, 128, 100, 75]         (147,584)
│    └─ReLU: 2-9                         [1, 128, 100, 75]         --
│    └─MaxPool2d: 2-10                   [1, 128, 50, 37]          --
│    └─Conv2d: 2-11                      [1, 256, 50, 37]    

<All keys matched successfully>

In [6]:
test_loader = DataLoader(test_tensor_dataset, batch_size=16, shuffle=False)
model.eval()

all_preds = []
all_targets = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        probs = torch.sigmoid(outputs)
        preds = (probs > 0.4).int()

        all_preds.append(preds.cpu())
        all_targets.append(labels.cpu())

y_true = torch.cat(all_targets).numpy()
y_pred = torch.cat(all_preds).numpy()

In [14]:
label_names = [i for i in genre_to_idx.keys()]

print(classification_report(y_true, y_pred, target_names=label_names, zero_division=0))

loss = hamming_loss(y_true, y_pred)
print(f"Hamming Loss: {loss:.4f}")

              precision    recall  f1-score   support

      Action       0.37      0.35      0.36      1156
   Adventure       0.36      0.30      0.33      1112
      Comedy       0.48      0.53      0.50      1218
       Crime       0.28      0.33      0.30      1220
       Drama       0.48      0.65      0.55      1214
      Horror       0.51      0.40      0.45      1235
     Romance       0.37      0.37      0.37      1230
      Sci-Fi       0.42      0.41      0.42      1170
    Thriller       0.32      0.26      0.29      1197

   micro avg       0.40      0.40      0.40     10752
   macro avg       0.40      0.40      0.40     10752
weighted avg       0.40      0.40      0.40     10752
 samples avg       0.41      0.43      0.39     10752

Hamming Loss: 0.1973


### Give a random poster as input and get the genres it belongs to.


In [18]:
model.eval()
transform = transforms.Compose([
    transforms.Resize((200,150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [19]:
# select a random index from the posters folder or input the image you want to classify.
model.to('cpu')
imdb_id = "tt0002797"
img_path = f"posters/{imdb_id}.jpg"
print(img_path)
image = Image.open(img_path).convert('RGB')

input_tensor = transform(image).unsqueeze(0)

posters/tt0002797.jpg


In [20]:
with torch.no_grad():
    outputs = model(input_tensor)
    predicted_class = outputs.argmax(dim=1).item()

print(f'Predicted class index: {predicted_class}')

Predicted class index: 2


{'Action': 0,
 'Adventure': 1,
 'Comedy': 2,
 'Crime': 3,
 'Drama': 4,
 'Horror': 5,
 'Romance': 6,
 'Sci-Fi': 7,
 'Thriller': 8}  \\

 Above are the indices of the respective genres.

In [21]:
df = pd.read_csv("movies_with_posters.csv")
genre_row = df.loc[df['movie_id'] == imdb_id, 'genres']
print(genre_row.values[0])

['Comedy']
