In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer

In [3]:
anime_df = pd.read_csv('anime.csv')
num_users = 100 
num_animes = anime_df.shape[0]

In [4]:
np.random.seed(42)  
user_item_matrix = np.random.randint(0, 2, (num_users, num_animes))
anime_df['genre'] = anime_df['genre'].fillna('').apply(lambda x: x.split(', '))

In [5]:
mlb = MultiLabelBinarizer()
genres_encoded = mlb.fit_transform(anime_df['genre'])
user_item_matrix.shape, genres_encoded.shape

((100, 12294), (12294, 44))

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AnimeRecommendationModel(nn.Module):
    def __init__(self, num_users, num_animes, num_genres, embedding_size):
        super(AnimeRecommendationModel, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.anime_embedding = nn.Embedding(num_animes, embedding_size)
        self.genre_dense = nn.Linear(num_genres, embedding_size)

        self.attention_fc = nn.Linear(embedding_size, embedding_size)

        self.fc1 = nn.Linear(embedding_size * 3, 128)  
        self.fc2 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 1)

        self.dropout = nn.Dropout(0.5)

    def forward(self, user_ids, anime_ids, genres):
        user_embedded = self.user_embedding(user_ids)
        anime_embedded = self.anime_embedding(anime_ids)
        genre_embedded = F.relu(self.genre_dense(genres))

        attention_weights = F.softmax(self.attention_fc(anime_embedded), dim = 1)
        attended_genres = attention_weights * genre_embedded

        combined = torch.cat([user_embedded, anime_embedded, attended_genres], dim = 1)
        combined = F.relu(self.fc1(combined))
        combined = self.dropout(combined)
        combined = F.relu(self.fc2(combined))

        prediction = torch.sigmoid(self.output(combined))
        return prediction

embedding_size = 50
model = AnimeRecommendationModel(num_users = 100, num_animes = 12294, num_genres=44, embedding_size=embedding_size)
print(model)

AnimeRecommendationModel(
  (user_embedding): Embedding(100, 50)
  (anime_embedding): Embedding(12294, 50)
  (genre_dense): Linear(in_features=44, out_features=50, bias=True)
  (attention_fc): Linear(in_features=50, out_features=50, bias=True)
  (fc1): Linear(in_features=150, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (output): Linear(in_features=64, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [7]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

class AnimeDataset(Dataset):
    def __init__(self, user_item_matrix, genres_encoded):
        self.user_item_matrix = user_item_matrix
        self.genres_encoded = genres_encoded
        self.num_users, self.num_animes = user_item_matrix.shape

    def __len__(self):
        return self.num_users * self.num_animes

    def __getitem__(self, idx):
        user_id = idx // self.num_animes
        anime_id = idx % self.num_animes
        user_preference = self.user_item_matrix[user_id, anime_id]
        genres = self.genres_encoded[anime_id]

        return torch.tensor(user_id, dtype = torch.long), torch.tensor(anime_id, dtype = torch.long), torch.tensor(genres, dtype = torch.float), torch.tensor(user_preference, dtype = torch.float)

user_interactions_df = pd.read_csv("user_interactions.csv")
anime_dataset = AnimeDataset(user_item_matrix = user_interactions_df.pivot(index = 'user_id', columns = 'anime_id', values = 'watched').fillna(0).to_numpy(), genres_encoded = genres_encoded)

In [8]:
from torch.utils.data import DataLoader, SubsetRandomSampler

train_indices, val_indices = train_test_split(range(len(anime_dataset)), test_size = 0.2, random_state = 42)
train_loader = DataLoader(anime_dataset, sampler = SubsetRandomSampler(train_indices), batch_size = 64)
val_loader = DataLoader(anime_dataset, sampler = SubsetRandomSampler(val_indices), batch_size = 64)

In [9]:
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.BCELoss()  
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for user_ids, anime_ids, genres, labels in train_loader:
        user_ids, anime_ids, genres, labels = user_ids.to(device), anime_ids.to(device), genres.to(device), labels.to(device)

        outputs = model(user_ids, anime_ids, genres).squeeze()
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)

    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for user_ids, anime_ids, genres, labels in val_loader:
            user_ids, anime_ids, genres, labels = user_ids.to(device), anime_ids.to(device), genres.to(device), labels.to(device)
            outputs = model(user_ids, anime_ids, genres).squeeze()
            loss = criterion(outputs, labels)
            total_val_loss += loss.item()

    avg_val_loss = total_val_loss / len(val_loader)

    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

Epoch [1/10], Training Loss: 0.6934, Validation Loss: 0.6932
Epoch [2/10], Training Loss: 0.6933, Validation Loss: 0.6931
Epoch [3/10], Training Loss: 0.6932, Validation Loss: 0.6932
Epoch [4/10], Training Loss: 0.6932, Validation Loss: 0.6931
Epoch [5/10], Training Loss: 0.6932, Validation Loss: 0.6932
Epoch [6/10], Training Loss: 0.6932, Validation Loss: 0.6932
Epoch [7/10], Training Loss: 0.6932, Validation Loss: 0.6931
Epoch [8/10], Training Loss: 0.6932, Validation Loss: 0.6932
Epoch [9/10], Training Loss: 0.6932, Validation Loss: 0.6931
Epoch [10/10], Training Loss: 0.6932, Validation Loss: 0.6931


In [19]:
def initialize_model(embedding_size):
    num_users = user_item_matrix.shape[0]
    num_animes = anime_df.shape[0]
    num_genres = len(genres_encoded[0])
    
    model = AnimeRecommendationModel(num_users, num_animes, num_genres, embedding_size)
    return model

In [20]:
embedding_size = 50
model = initialize_model(embedding_size)

In [43]:
import pandas as pd
import numpy as np
import torch

def get_recommendations(watched_animes, selected_genres, num_recommendations=3):
    anime_title_to_id = {title: anime_id for anime_id, title in enumerate(anime_df['name'])}
    
    watched_anime_ids = [anime_title_to_id.get(anime, -1) for anime in watched_animes]    
    watched_anime_ids = [anime_id for anime_id in watched_anime_ids if anime_id != -1]
    
    selected_genre_vector = np.zeros(len(genres_encoded[0]))
    for genre in selected_genres:
        genre_idx = anime_df['genre'].apply(lambda x: genre in x)
        selected_genre_vector += genres_encoded[genre_idx].sum(axis=0)
    
    user_ids = torch.tensor([0] * len(watched_anime_ids), dtype=torch.long).to(device)
    anime_ids = torch.tensor(watched_anime_ids, dtype=torch.long).to(device)
    selected_genre_tensor = torch.tensor(selected_genre_vector, dtype=torch.float).to(device)
    
    with torch.no_grad():
        recommendations = model(user_ids, anime_ids, selected_genre_tensor).squeeze()
    
    sorted_indices = torch.argsort(recommendations, descending=True)
    
    # Exclude watched anime IDs from the recommendations
    recommended_anime_ids = [anime_id for anime_id in sorted_indices if anime_id not in watched_anime_ids][:num_recommendations]
    
    # Convert recommended_anime_ids to a list of integers
    recommended_anime_ids = [int(idx) for idx in recommended_anime_ids]
    
    recommended_anime_names = [anime_df.iloc[anime_id]['name'] for anime_id in recommended_anime_ids]
    return recommended_anime_names

watched_animes = ["Fullmetal Alchemist: Brotherhood", "Steins;Gate", "Toushindai My Lover: Minami tai Mecha-Minami", "Violence Gekiga David no Hoshi"]
selected_genres = ["Action", "Adventure", "Drama"]
num_recommendations = 3  # You can change this to the number of recommendations you want
recommendations = get_recommendations(watched_animes, selected_genres, num_recommendations)
print("Recommended Animes:")
for idx, anime in enumerate(recommendations, start=1):
    print(f"{idx}. {anime}")

Recommended Animes:
1. Kimi no Na wa.
2. Gintama°
