# NEURAL NETWORK HYBRID COLLOBORATIVE FILTERING

The model is learning latent factors for both users and items using a neural network. The similarity function computes the similarity between two items based on their latent factors, which is related to item-based collaborative filtering. The rating function predicts a user's rating for an item based on the user's latent factors and the item's latent factors, which combines both user-based and item-based collaborative filtering.

Thus, the provided code is implementing a hybrid collaborative filtering approach.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and process data
df_anime = pd.read_csv('animes.csv')
df_reviews = pd.read_csv('reviews.csv')
df_anime = df_anime[['uid', 'title']]
df_anime.rename(columns={'uid': 'anime_id'}, inplace=True)
df_anime = df_anime.drop_duplicates()

df_reviews_cp = df_reviews[['profile', 'anime_uid', 'score']]
df_reviews_cp = df_reviews_cp[df_reviews_cp['score'] != -1]
df_reviews_cp['score'] = df_reviews_cp['score'] / 10  # Scale the scores to the range of 0 to 5
df_reviews_cp = df_reviews_cp.sample(frac=1).reset_index(drop=True)

# Map user and anime ids to integer indices
user_mapping = {user_id: idx for idx, user_id in enumerate(df_reviews_cp.profile.unique())}
anime_mapping = {anime_id: idx for idx, anime_id in enumerate(df_reviews_cp.anime_uid.unique())}


df_reviews_cp['user_id'] = df_reviews_cp['profile'].apply(lambda x: user_mapping[x])
df_reviews_cp['anime_id'] = df_reviews_cp['anime_uid'].apply(lambda x: anime_mapping[x])

# Define neural network-based item similarity and rating prediction model
class ItemSimilarityAndRating(nn.Module):
    def __init__(self, n_users, n_anime, n_factors):
        super(ItemSimilarityAndRating, self).__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.anime_factors = nn.Embedding(n_anime, n_factors)

    def similarity(self, anime1, anime2):
        dot_product = (self.anime_factors(anime1) * self.anime_factors(anime2)).sum(1)
        return dot_product

    def rating(self, user, anime):
        dot_product = (self.user_factors(user) * self.anime_factors(anime)).sum(1)
        return torch.sigmoid(dot_product) * 10  # Scale the predicted ratings to the range of 0 to 5

# Initialize model, loss function, and optimizer
n_users = len(user_mapping)
n_anime = len(anime_mapping)
n_factors = 200

model = ItemSimilarityAndRating(n_users, n_anime, n_factors)
loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 25
batch_size = 1024

for epoch in range(epochs):
    train_data, val_data = train_test_split(df_reviews_cp, test_size=0.2, random_state=42)
    shuffled_indices = torch.randperm(len(train_data))
    for batch_start in range(0, len(train_data), batch_size):
        batch_indices = shuffled_indices[batch_start:batch_start + batch_size]
        user_batch = torch.tensor(train_data.iloc[batch_indices]['user_id'].values, dtype=torch.long)
        anime_batch = torch.tensor(train_data.iloc[batch_indices]['anime_id'].values, dtype=torch.long)
        score_batch = torch.tensor(train_data.iloc[batch_indices]['score'].values, dtype=torch.float32)

        optimizer.zero_grad()
        predictions = model.rating(user_batch, anime_batch)
        loss = loss_func(predictions, score_batch)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss.item()}")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and process data
df_anime = pd.read_csv('animes.csv')
df_reviews = pd.read_csv('reviews.csv')
df_anime = df_anime[['uid', 'title','genre']]
df_anime.rename(columns={'uid': 'anime_id'}, inplace=True)
df_anime = df_anime.drop_duplicates()

In [None]:
def append_data(username, anime_uid=0, score, df_reviews=''):
    # Create a new DataFrame with the data to append
    new_data = pd.DataFrame({'profile': [username], 'anime_uid': [anime_uid], 'score': [score]})

    # Append the new data to the existing DataFrame
    df_reviews = df_reviews.append(new_data, ignore_index=True)


In [None]:
df_reviews.columns

In [None]:
genres_18_above = ['Hentai', 'Ecchi', 'Harem', 'Yuri', 'Yaoi']

def is_18_above(genre_str):
    if isinstance(genre_str, str):
        for genre in genres_18_above:
            if genre in genre_str:
                return 1
        return 0

df_anime['18_above'] = df_anime['genre'].apply(is_18_above)

In [None]:
df_anime.columns

In [None]:
df_anime['18_above'].value_counts()

In [None]:
df_anime = df_anime[df_anime['18_above'] == 0].reset_index(drop=True)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Load and process data
df_anime = pd.read_csv('animes.csv')
df_reviews = pd.read_csv('reviews.csv')
df_anime = df_anime[['uid', 'title', 'genre']]
df_anime.rename(columns={'uid': 'anime_id'}, inplace=True)
df_anime = df_anime.drop_duplicates()

df_reviews_cp = df_reviews[['profile', 'anime_uid', 'score']]
df_reviews_cp = df_reviews_cp[df_reviews_cp['score'] != -1]
df_reviews_cp['score'] = df_reviews_cp['score'] / 10  # Scale the scores to the range of 0 to 5
df_reviews_cp = df_reviews_cp.sample(frac=1).reset_index(drop=True)

# Map user and anime ids to integer indices
user_mapping = {user_id: idx for idx, user_id in enumerate(df_reviews_cp.profile.unique())}
anime_mapping = {anime_id: idx for idx, anime_id in enumerate(df_reviews_cp.anime_uid.unique())}


df_reviews_cp['user_id'] = df_reviews_cp['profile'].apply(lambda x: user_mapping[x])
df_reviews_cp['anime_id'] = df_reviews_cp['anime_uid'].apply(lambda x: anime_mapping[x])

genres_18_above = ['Hentai', 'Ecchi', 'Harem', 'Yuri', 'Yaoi']

def is_18_above(genre_str):
    if isinstance(genre_str, str):
        for genre in genres_18_above:
            if genre in genre_str:
                return 1
        return 0

df_anime['18_above'] = df_anime['genre'].apply(is_18_above)

# Define neural network-based item similarity and rating prediction model
class ItemSimilarityAndRating(nn.Module):
    def __init__(self, n_users, n_anime, n_factors):
        super(ItemSimilarityAndRating, self).__init__()
        self.user_factors = nn.Embedding(n_users, n_factors)
        self.anime_factors = nn.Embedding(n_anime, n_factors)
        self.user_bias = nn.Embedding(n_users, 1)  # Add user bias
        self.anime_bias = nn.Embedding(n_anime, 1)  # Add anime bias

    def rating(self, user, anime):
        dot_product = (self.user_factors(user) * self.anime_factors(anime)).sum(1)
        rating = dot_product + self.user_bias(user).squeeze() + self.anime_bias(anime).squeeze()
        return torch.sigmoid(rating) * 10


    def similarity(self, anime1, anime2):
        dot_product = (self.anime_factors(anime1) * self.anime_factors(anime2)).sum(1)
        return dot_product


# Initialize model, loss function, and optimizer
n_users = len(user_mapping)
n_anime = len(anime_mapping)
n_factors = 200

model = ItemSimilarityAndRating(n_users, n_anime, n_factors)
loss_func = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
epochs = 50
batch_size = 1024

for epoch in range(epochs):
    train_data, val_data = train_test_split(df_reviews_cp, test_size=0.2, random_state=42)
    shuffled_indices = torch.randperm(len(train_data))
    for batch_start in range(0, len(train_data), batch_size):
        batch_indices = shuffled_indices[batch_start:batch_start + batch_size]
        user_batch = torch.tensor(train_data.iloc[batch_indices]['user_id'].values, dtype=torch.long)
        anime_batch = torch.tensor(train_data.iloc[batch_indices]['anime_id'].values, dtype=torch.long)
        score_batch = torch.tensor(train_data.iloc[batch_indices]['score'].values, dtype=torch.float32)

        optimizer.zero_grad()
        predictions = model.rating(user_batch, anime_batch)
        loss = loss_func(predictions, score_batch)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss.item()}")



In [None]:
model.eval()  # Set the model to evaluation mode

val_user_batch = torch.tensor(val_data['user_id'].values, dtype=torch.long)
val_anime_batch = torch.tensor(val_data['anime_id'].values, dtype=torch.long)
val_score_batch = torch.tensor(val_data['score'].values, dtype=torch.float32)

with torch.no_grad():
    val_predictions = model.rating(val_user_batch, val_anime_batch)
    val_loss = loss_func(val_predictions, val_score_batch)

print(f"Validation Loss: {val_loss.item()}")

In [None]:
val_predictions_np = val_predictions.numpy()
result_df = pd.DataFrame({
    'user_id': val_data['user_id'].values,
    'anime_id': val_data['anime_id'].values,
    'original_score': val_data['score'].values*10,
    'predicted_score': np.round(val_predictions_np,0)})


In [None]:
result_df

In [None]:
mae = torch.mean(torch.abs(val_predictions - val_score_batch)).item()
print(f"Mean Absolute Error: {mae}")

In [None]:
torch.save(model.state_dict(), 'item_similarity_and_rating_model.pth')

In [None]:
loaded_model = ItemSimilarityAndRating(n_users, n_anime, n_factors)
loaded_model.load_state_dict(torch.load('item_similarity_and_rating_model.pth'))
loaded_model.eval()

In [None]:
anime_mapping

In [None]:
# Get top 10 similar animes
anime_id = 1  # Change this to the ID of the anime you want to find similar animes for
anime_index = anime_mapping[anime_id]
anime_tensor = torch.tensor([anime_index] * n_anime, dtype=torch.long)
other_anime_tensor = torch.tensor(list(range(n_anime)), dtype=torch.long)

anime_similarities = loaded_model.similarity(anime_tensor, other_anime_tensor)

# Get top 11 similar animes
top_11_indices = torch.topk(anime_similarities, 11).indices

top_10_indices = top_11_indices[1:]

top_10_anime_ids = [list(anime_mapping.keys())[list(anime_mapping.values()).index(idx)] for idx in top_10_indices.tolist()]
recommended_anime = df_anime[df_anime['anime_id'].isin(top_10_anime_ids)].reset_index(drop=True)
# Filter out animes with 18_above == 1
recommended_anime = recommended_anime[recommended_anime['18_above'] == 0].reset_index(drop=True)

top_10_similarities = anime_similarities[top_10_indices].tolist()
print("Top 10 Similar Animes and Similarity Scores:")
for idx, row in recommended_anime.iterrows():
    print(f"{row['title']} (Similarity Score: {top_10_similarities[idx]:.2f})")


In [None]:
# Get top 10 similar animes
anime_id = 1  # Change this to the ID of the anime you want to find similar animes for
anime_index = anime_mapping[anime_id]
anime_tensor = torch.tensor([anime_index] * n_anime, dtype=torch.long)
other_anime_tensor = torch.tensor(list(range(n_anime)), dtype=torch.long)

anime_similarities = loaded_model.similarity(anime_tensor, other_anime_tensor)

# Get top 11 similar animes
top_11_indices = torch.topk(anime_similarities, 11).indices

top_10_indices = top_11_indices[1:]

top_10_anime_ids = [list(anime_mapping.keys())[list(anime_mapping.values()).index(idx)] for idx in top_10_indices.tolist()]
recommended_anime = df_anime[df_anime['anime_id'].isin(top_10_anime_ids)].reset_index(drop=True)
# Filter out animes with 18_above == 1
recommended_anime = recommended_anime[recommended_anime['18_above'] == 0].reset_index(drop=True)

top_10_similarities = anime_similarities[top_10_indices].tolist()
print("Top 10 Similar Animes and Similarity Scores:")
for idx, row in recommended_anime.iterrows():
    print(f"{row['title']} (Similarity Score: {top_10_similarities[idx]:.2f})")


In [None]:
df_reviews

In [None]:
# Predict ratings for a user
user_id = 'baekbeans'  # Change this to the user ID for which you want to predict ratings
user_index = user_mapping[user_id]
user_tensor = torch.tensor([user_index] * n_anime, dtype=torch.long)

predicted_ratings = loaded_model.rating(user_tensor, other_anime_tensor)

unique_anime_ids = df_reviews_cp['anime_uid'].unique()
unique_animes_df = df_anime[df_anime['anime_id'].isin(unique_anime_ids)].reset_index(drop=True)
predicted_ratings_df = pd.DataFrame({'title': unique_animes_df['title'], 'predicted_rating': predicted_ratings.tolist()})

predicted_ratings_df = predicted_ratings_df.sort_values(by='predicted_rating', ascending=False).head(10)
print("\nPredicted Ratings for User:")
print(predicted_ratings_df['predicted_rating'])

In [None]:
# Map all anime ids to indices
unique_anime_ids = df_reviews_cp['anime_uid'].unique()
unique_animes_df = df_anime[df_anime['anime_id'].isin(unique_anime_ids)].reset_index(drop=True)
n_unique_anime = len(unique_anime_ids)

# Create user_tensor and other_anime_tensor for unique animes
user_tensor_unique = torch.tensor([user_index] * n_unique_anime, dtype=torch.long)
other_anime_tensor_unique = torch.tensor(list(range(n_unique_anime)), dtype=torch.long)

# Predict ratings for unique animes
predicted_ratings_unique = loaded_model.rating(user_tensor_unique, other_anime_tensor_unique)

# Create a dataframe with predicted ratings for unique animes
predicted_ratings_df = pd.DataFrame({'title': unique_animes_df['title'], 'predicted_rating': predicted_ratings_unique.tolist()})
predicted_ratings_df['predicted_rating'] = predicted_ratings_df['predicted_rating'].apply(round)

print("\nPredicted Ratings for:",user_id)
print(predicted_ratings_df['predicted_rating'])

In [None]:
predicted_ratings_df['predicted_rating'].value_counts()

In [None]:
# Map all anime ids to indices
unique_anime_ids = df_reviews_cp['anime_uid'].unique()
unique_animes_df = df_anime[df_anime['anime_id'].isin(unique_anime_ids)].reset_index(drop=True)
n_unique_anime = len(unique_anime_ids)

# Create user_tensor and other_anime_tensor for unique animes
user_tensor_unique = torch.tensor([user_index] * n_unique_anime, dtype=torch.long)
other_anime_tensor_unique = torch.tensor(list(range(n_unique_anime)), dtype=torch.long)

# Predict ratings for unique animes
predicted_ratings_unique = model.rating(user_tensor_unique, other_anime_tensor_unique)

# Create a dataframe with predicted ratings for unique animes
predicted_ratings_df = pd.DataFrame({'title': unique_animes_df['title'], 'predicted_rating': predicted_ratings_unique.tolist()})
predicted_ratings_df['predicted_rating'] = predicted_ratings_df['predicted_rating'].apply(round)

print("\nPredicted Ratings for User:")
print(predicted_ratings_df['predicted_rating'])

In [None]:
predicted_ratings_df['predicted_rating'].value_counts()