In [5]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
import warnings
warnings.filterwarnings('ignore')

# ----------------------------
# Step 1: Load user_filtered.csv
# ----------------------------

# Define data types for user_filtered.csv
user_filtered_dtypes = {
    'user_id': 'int32',
    'anime_id': 'int32',
    'rating': 'float32'
}

try:
    # Load user_filtered.csv with specified dtypes
    user_ratings = pd.read_csv(
        'user_filtered.csv',
        dtype=user_filtered_dtypes
    )
    print("Successfully loaded user_filtered.csv.")
except MemoryError:
    print("MemoryError: Unable to load user_filtered.csv.")
except Exception as e:
    print(f"An error occurred while loading user_filtered.csv: {e}")

# ----------------------------
# Step 2: Load anime_filtered.csv
# ----------------------------

# Define data types for anime_filtered.csv
anime_filtered_dtypes = {
    'anime_id': 'int32',
    'Name': 'object',
    'sypnopsis': 'object'
}

try:
    # Load only necessary columns to save memory
    anime_details = pd.read_csv(
        'anime_filtered.csv',
        usecols=['anime_id', 'Name', 'sypnopsis'],
        dtype=anime_filtered_dtypes
    )
    print("Successfully loaded anime_filtered.csv with selected columns.")
except MemoryError:
    print("MemoryError: Unable to load anime_filtered.csv even after optimization.")
except Exception as e:
    print(f"An error occurred while loading anime_filtered.csv: {e}")

# ----------------------------
# Step 3: Verify DataFrames
# ----------------------------

# Display the first few rows of user_ratings
print("User Ratings Data:")
display(user_ratings.head())

# Display the first few rows of anime_details
print("Anime Details Data:")
display(anime_details.head())

# ----------------------------
# Step 4: Handle Duplicates in anime_details
# ----------------------------

# Check for duplicate anime_id in anime_details
duplicate_anime_ids = anime_details[anime_details.duplicated('anime_id', keep=False)]
if not duplicate_anime_ids.empty:
    print(f"Found {duplicate_anime_ids.shape[0]} duplicate anime_id entries in anime_details.")
    # Remove duplicates by keeping the first occurrence
    anime_details = anime_details.drop_duplicates(subset='anime_id', keep='first')
    print("Duplicates removed from anime_details.")
else:
    print("No duplicate anime_id entries found in anime_details.")

# ----------------------------
# Step 5: Filter Anime with Minimum Ratings
# ----------------------------

# Define the minimum number of ratings required
min_ratings = 50

# Calculate the number of ratings per anime
anime_rating_counts = user_ratings['anime_id'].value_counts()

# Identify anime_ids that meet the minimum rating threshold
popular_anime = anime_rating_counts[anime_rating_counts >= min_ratings].index.tolist()

# Filter user_ratings to include only popular anime
user_ratings_filtered = user_ratings[user_ratings['anime_id'].isin(popular_anime)]

print(f"Number of anime after filtering: {len(popular_anime)}")
print(f"Number of user ratings after filtering: {user_ratings_filtered.shape[0]}")

# ----------------------------
# Step 6: Merge Filtered DataFrames
# ----------------------------

# Merge the filtered user ratings with anime details
ratings_with_anime_filtered = pd.merge(
    user_ratings_filtered,
    anime_details,
    on='anime_id',
    how='left'
)

print("Successfully merged filtered user_ratings with anime_details.")
display(ratings_with_anime_filtered.head())

# ----------------------------
# Step 7: Create Mappings
# ----------------------------

# Create unique lists of user_ids and anime_ids
user_ids = ratings_with_anime_filtered['user_id'].unique()
anime_ids = ratings_with_anime_filtered['anime_id'].unique()

# Create mappings from ids to indices
user_id_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
anime_id_to_index = {anime_id: index for index, anime_id in enumerate(anime_ids)}
index_to_anime_id = {index: anime_id for anime_id, index in anime_id_to_index.items()}

# Map user_ids and anime_ids to indices
user_indices = ratings_with_anime_filtered['user_id'].map(user_id_to_index)
anime_indices = ratings_with_anime_filtered['anime_id'].map(anime_id_to_index)
ratings = ratings_with_anime_filtered['rating'].values

# ----------------------------
# Step 8: Construct Sparse User-Item Matrix
# ----------------------------

# Create the sparse matrix
user_item_matrix_sparse = csr_matrix((ratings, (user_indices, anime_indices)), 
                                     shape=(len(user_ids), len(anime_ids)))

print(f"Sparse User-Item Matrix Shape: {user_item_matrix_sparse.shape}")

# ----------------------------
# Step 9: Build KNN Model
# ----------------------------

# Initialize the KNN model
knn = NearestNeighbors(metric='cosine', algorithm='brute')

# Fit the model using the sparse matrix
knn.fit(user_item_matrix_sparse)

print("KNN model successfully fitted on the sparse User-Item Matrix.")

# ----------------------------
# Step 10: Define Recommendation Function
# ----------------------------

def recommend_anime(user_id, user_item_matrix, anime_details, knn_model, 
                   user_id_to_index, anime_id_to_index, 
                   index_to_anime_id, n_neighbors=5, n_recommendations=5):
    """
    Recommend anime to a user based on KNN.
    
    Parameters:
    - user_id (int): The ID of the user to make recommendations for.
    - user_item_matrix (csr_matrix): The user-item rating matrix.
    - anime_details (DataFrame): The anime details dataset.
    - knn_model (NearestNeighbors): The trained KNN model.
    - user_id_to_index (dict): Mapping from user_id to matrix index.
    - anime_id_to_index (dict): Mapping from anime_id to matrix index.
    - index_to_anime_id (dict): Mapping from matrix index to anime_id.
    - n_neighbors (int): Number of similar users to consider.
    - n_recommendations (int): Number of anime to recommend.
    
    Returns:
    - recommendations (DataFrame): Recommended anime with synopsis.
    """
    if user_id not in user_id_to_index:
        print("User ID not found in the dataset.")
        return
    
    # Get the index of the user
    user_idx = user_id_to_index[user_id]
    
    # Get the user's ratings
    user_ratings = user_item_matrix[user_idx]
    
    # Find similar users
    distances, indices = knn_model.kneighbors(user_ratings, n_neighbors=n_neighbors+1)  # +1 to exclude the user itself
    
    # Get similar users' indices (exclude the user itself)
    similar_users_indices = indices.flatten()[1:]
    
    # Aggregate ratings from similar users
    similar_users_ratings = user_item_matrix[similar_users_indices]
    
    # Compute the mean ratings for each anime
    mean_ratings = similar_users_ratings.mean(axis=0).A1  # Convert to 1D array
    
    # Convert to a Series for easier manipulation
    mean_ratings_series = pd.Series(mean_ratings, index=index_to_anime_id.keys())
    
    # Get anime_ids already rated by the user
    user_rated_anime = user_ratings.nonzero()[1]
    user_rated_anime_ids = [index_to_anime_id[idx] for idx in user_rated_anime]
    
    # Exclude anime already rated by the user
    recommendations = mean_ratings_series.drop(user_rated_anime_ids, errors='ignore')
    
    # Get top N recommendations
    top_recommendations = recommendations.sort_values(ascending=False).head(n_recommendations).index.tolist()
    
    # Fetch anime details
    recommended_anime = anime_details[anime_details['anime_id'].isin(top_recommendations)][['Name', 'sypnopsis']]
    
    return recommended_anime.reset_index(drop=True)

# ----------------------------
# Step 11: Example Recommendation
# ----------------------------

# Example Usage
user_id_input = 1  # Replace with the desired user_id

recommended_anime = recommend_anime(
    user_id=user_id_input,
    user_item_matrix=user_item_matrix_sparse,
    anime_details=anime_details,
    knn_model=knn,
    user_id_to_index=user_id_to_index,
    anime_id_to_index=anime_id_to_index,
    index_to_anime_id=index_to_anime_id,
    n_neighbors=5,
    n_recommendations=5
)

print(f"Top 5 Recommendations for User ID {user_id_input}:\n")
display(recommended_anime)


An error occurred while loading user_filtered.csv: [Errno 2] No such file or directory: 'user_filtered.csv'
An error occurred while loading anime_filtered.csv: [Errno 2] No such file or directory: 'anime_filtered.csv'
User Ratings Data:


Unnamed: 0,user_id,anime_id,rating
0,0,67,9.0
1,0,6702,7.0
2,0,242,10.0
3,0,4898,0.0
4,0,21,10.0


Anime Details Data:


Unnamed: 0,anime_id,Name,sypnopsis
0,1,Cowboy Bebop,"In the year 2071, humanity has colonized sever..."
1,5,Cowboy Bebop: Tengoku no Tobira,"other day, another bounty—such is the life of ..."
2,6,Trigun,"Vash the Stampede is the man with a $$60,000,0..."
3,7,Witch Hunter Robin,ches are individuals with special powers like ...
4,8,Bouken Ou Beet,It is the dark century and the people are suff...


No duplicate anime_id entries found in anime_details.
Number of anime after filtering: 16272
Number of user ratings after filtering: 109179072
Successfully merged filtered user_ratings with anime_details.


Unnamed: 0,user_id,anime_id,rating,Name,sypnopsis
0,0,67,9.0,Basilisk: Kouga Ninpou Chou,"For centuries, the Iga and Kouga ninja clans h..."
1,0,6702,7.0,Fairy Tail,"In the mystical land of Fiore, magic exists as..."
2,0,242,10.0,Gokusen,"Kumiko Yamaguchi is smart, enthusiastic, and r..."
3,0,4898,0.0,Kuroshitsuji,"Young Ciel Phantomhive is known as ""the Queen'..."
4,0,21,10.0,One Piece,"Gol D. Roger was known as the ""Pirate King,"" t..."


Sparse User-Item Matrix Shape: (325770, 16272)
KNN model successfully fitted on the sparse User-Item Matrix.
Top 5 Recommendations for User ID 1:



Unnamed: 0,Name,sypnopsis
0,Blue Gender,Blue Gender takes place in the not too distant...
1,Fruits Basket,fter the accident in which she lost her mother...
2,Gate Keepers,"Technology, science, and industry—this is 1969..."
3,Gensoumaden Saiyuuki,"any years ago, humans and demons lived in harm..."
4,Sakura Taisen,Sakura travels to the capital with aspirations...
