In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import time

# Load and process data
df_anime = pd.read_csv('C:/Users/vigne/OneDrive/Documents/WPI/Sem_2/IR/project/archive/animes.csv')
df_rating = pd.read_csv('C:/Users/vigne/OneDrive/Documents/WPI/Sem_2/IR/project/archive/rating.csv')
df_anime = df_anime[['uid', 'title']]
df_anime.rename(columns={'uid': 'anime_id'}, inplace=True)
df_anime = df_anime.drop_duplicates()

df_rating_cp = df_rating[df_rating['rating'] != -1]
df_rating_cp = df_rating_cp.sample(frac=1)[:1000000].sort_values('user_id')

# Map user and anime ids to integer indices
user_mapping = {user_id: idx for idx, user_id in enumerate(df_rating_cp.user_id.unique())}
anime_mapping = {anime_id: idx for idx, anime_id in enumerate(df_rating_cp.anime_id.unique())}

df_rating_cp['user_id'] = df_rating_cp['user_id'].apply(lambda x: user_mapping[x])
df_rating_cp['anime_id'] = df_rating_cp['anime_id'].apply(lambda x: anime_mapping[x])

# Define neural network-based matrix factorization model
class MatrixFactorization(nn.Module):
    def __init__(self, n_users, n_anime, n_factors):
        super(MatrixFactorization, self).__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.anime_factors = nn.Embedding(n_anime, n_factors)

    def forward(self, user, anime):
        return (self.user_factors(user) * self.anime_factors(anime)).sum(1)

# Initialize model, loss function, and optimizer
n_users = len(user_mapping)
n_anime = len(anime_mapping)
n_factors = 100

model = MatrixFactorization(n_users, n_anime, n_factors)
loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 20
batch_size = 1024

for epoch in range(epochs):
    shuffled_indices = torch.randperm(len(df_rating_cp))
    for batch_start in range(0, len(df_rating_cp), batch_size):
        batch_indices = shuffled_indices[batch_start:batch_start + batch_size]
        user_batch = torch.tensor(df_rating_cp.iloc[batch_indices]['user_id'].values, dtype=torch.long)
        anime_batch = torch.tensor(df_rating_cp.iloc[batch_indices]['anime_id'].values, dtype=torch.long)
        rating_batch = torch.tensor(df_rating_cp.iloc[batch_indices]['rating'].values, dtype=torch.float32)

        optimizer.zero_grad()
        predictions = model(user_batch, anime_batch)
        loss = loss_func(predictions, rating_batch)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss.item()}")



Epoch 1/20 - Loss: 158.8330535888672
Epoch 2/20 - Loss: 122.42342376708984
Epoch 3/20 - Loss: 109.28271484375
Epoch 4/20 - Loss: 94.76754760742188
Epoch 5/20 - Loss: 81.03024291992188
Epoch 6/20 - Loss: 64.81282043457031
Epoch 7/20 - Loss: 58.09478759765625
Epoch 8/20 - Loss: 50.39680480957031
Epoch 9/20 - Loss: 44.56081771850586
Epoch 10/20 - Loss: 30.89578628540039
Epoch 11/20 - Loss: 27.647438049316406
Epoch 12/20 - Loss: 20.94245719909668
Epoch 13/20 - Loss: 15.411995887756348
Epoch 14/20 - Loss: 11.883370399475098
Epoch 15/20 - Loss: 9.148985862731934
Epoch 16/20 - Loss: 5.834691047668457
Epoch 17/20 - Loss: 5.22075891494751
Epoch 18/20 - Loss: 3.715306282043457
Epoch 19/20 - Loss: 3.1637372970581055
Epoch 20/20 - Loss: 2.312487840652466


In [2]:
user_id = 43
user_index = user_mapping[user_id]

not_watched = [anime_idx for anime_idx in range(n_anime) if anime_idx not in df_rating_cp[df_rating_cp['user_id'] == user_index]['anime_id'].values]

# Get scores for not watched anime
user_tensor = torch.tensor([user_index] * len(not_watched), dtype=torch.long)
anime_tensor = torch.tensor(not_watched, dtype=torch.long)
anime_scores = model(user_tensor, anime_tensor)

# Get top 10 anime indices
top_10_indices = torch.topk(anime_scores, 10).indices

# Get top 10 anime ids
top_10_anime_ids = [list(anime_mapping.keys())[list(anime_mapping.values()).index(idx)] for idx in top_10_indices.tolist()]

# Get top 10 anime titles
recommended_anime = df_anime[df_anime['anime_id'].isin(top_10_anime_ids)].reset_index(drop=True)

print("Top 10 Recommended Anime:")
print(recommended_anime)

Top 10 Recommended Anime:
   anime_id                                              title
0      5460         Detective Conan Movie 13: The Raven Chaser
1      3536                                            Youshou
2     16648                    Youchien Senshi: Hanamaru Girls
3     10604     Hidan no Aria: Butei ga Kitarite Onsen Kenshuu
4     15893                                             Crash!
5      2814                               Dondon Domeru to Ron
6      4934                                                A.F
7     21575  Koukaku Kidoutai Arise: Ghost in the Shell - L...
8      7568                   Umineko no Naku Koro ni Specials
9     18449  Mobile Suit Gundam Battlefield Record: Avant-T...


In [3]:
# Suppose we have a new user with ID 100, and we want to recommend 10 animes to them

# Step 1: Map the new user ID to an integer index
new_user_index = user_mapping[100]

# Step 2: Create a list of not-yet-watched anime for the new user
not_watched = [anime_idx for anime_idx in range(n_anime) if anime_idx not in df_rating_cp[df_rating_cp['user_id'] == new_user_index]['anime_id'].values]

# Step 3: Get scores for not-yet-watched anime
user_tensor = torch.tensor([new_user_index] * len(not_watched), dtype=torch.long)
anime_tensor = torch.tensor(not_watched, dtype=torch.long)
anime_scores = model(user_tensor, anime_tensor)

# Step 4: Get top K anime indices
top_k_indices = torch.topk(anime_scores, 10).indices

# Step 5: Map the top K anime indices back to their respective anime IDs and titles
top_k_anime_ids = [list(anime_mapping.keys())[list(anime_mapping.values()).index(idx)] for idx in top_k_indices.tolist()]
recommended_anime = df_anime[df_anime['anime_id'].isin(top_k_anime_ids)].reset_index(drop=True)

# Print the top 10 recommended anime titles
print("Top 10 Recommended Anime:")
print(recommended_anime)


Top 10 Recommended Anime:
   anime_id                                              title
0      9735                   Gintama: Shinyaku Benizakura-hen
1      7266                                     Binkan Athlete
2      5402                                     Maid Meshimase
3      5612                                        Terra Story
4     17635                          Koitabi: True Tours Nanto
5     25259  Persona 4 the Animation: A Brief Lesson on Iza...
6     14093                      Pokemon Best Wishes! Season 2
7      2408  Keroro Gunsou Movie 2: Shinkai no Princess de ...
8      4312                            Hokuto no Ken: Toki-den
9      9014                                      Kuttsukiboshi


In [4]:
# Save the model to a file called 'model.pth'
torch.save(model.state_dict(), 'model.pth')

In [5]:
mapping_dict = {'user_mapping': user_mapping, 'anime_mapping': anime_mapping}
torch.save(mapping_dict, 'mapping.pth')