In [1]:
# STEP 1: Import Libraries
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings("ignore")


In [2]:
# STEP 2: Load Dataset
df = pd.read_csv("anime.csv")
df.head()


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama¬∞,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
# STEP 3: Basic Exploration
df.info()
df.isnull().sum()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [4]:
# STEP 4: Data Cleaning 
# Keep only required columns
df = df[['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members']]

# Fill missing values
df['genre'] = df['genre'].fillna('')
df['type'] = df['type'].fillna('Unknown')
df['rating'] = df['rating'].fillna(df['rating'].median())
df['episodes'] = df['episodes'].replace('Unknown', np.nan)
df['episodes'] = df['episodes'].astype(float)
df['episodes'] = df['episodes'].fillna(df['episodes'].median())
df['members'] = df['members'].fillna(df['members'].median())

df.head()


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1.0,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64.0,9.26,793665
2,28977,Gintama¬∞,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51.0,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24.0,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51.0,9.16,151266


In [5]:
# TF-IDF for genres 
tfidf = TfidfVectorizer(stop_words='english')
genre_matrix = tfidf.fit_transform(df['genre'])


In [6]:
# STEP 6: Scale Numerical Features
scaler = MinMaxScaler()

numeric_features = df[['rating', 'episodes', 'members']]
numeric_scaled = scaler.fit_transform(numeric_features)


In [7]:
# STEP 7: Combine Features
from scipy.sparse import hstack

final_features = hstack([genre_matrix, numeric_scaled])


In [8]:
# STEP 8: Cosine Similarity Matrix
cosine_sim = cosine_similarity(final_features, final_features)


In [9]:
# STEP 9: Recommendation Function
def recommend_anime(anime_name, top_n=10, similarity_threshold=0.3):
    if anime_name not in df['name'].values:
        return "Anime not found in dataset."

    idx = df[df['name'] == anime_name].index[0]

    similarity_scores = list(enumerate(cosine_sim[idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    filtered_scores = [
        (i, score) for i, score in similarity_scores
        if score >= similarity_threshold and i != idx
    ]

    top_animes = filtered_scores[:top_n]

    recommendations = df.iloc[[i[0] for i in top_animes]][
        ['name', 'genre', 'rating']
    ]

    return recommendations


In [10]:
# STEP 10: Test the Recommendation System
recommend_anime("Naruto", top_n=5, similarity_threshold=0.4)


Unnamed: 0,name,genre,rating
615,Naruto: Shippuuden,"Action, Comedy, Martial Arts, Shounen, Super P...",7.94
206,Dragon Ball Z,"Action, Adventure, Comedy, Fantasy, Martial Ar...",8.32
346,Dragon Ball,"Adventure, Comedy, Fantasy, Martial Arts, Shou...",8.16
1472,Naruto: Shippuuden Movie 4 - The Lost Tower,"Action, Comedy, Martial Arts, Shounen, Super P...",7.53
1573,Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...,"Action, Comedy, Martial Arts, Shounen, Super P...",7.5


In [11]:
# STEP 11: Try Different Thresholds 
recommend_anime("Death Note", top_n=10, similarity_threshold=0.2)


Unnamed: 0,name,genre,rating
445,Mirai Nikki (TV),"Action, Mystery, Psychological, Shounen, Super...",8.07
981,Mousou Dairinin,"Drama, Mystery, Police, Psychological, Superna...",7.74
778,Death Note Rewrite,"Mystery, Police, Psychological, Supernatural, ...",7.84
334,Higurashi no Naku Koro ni,"Horror, Mystery, Psychological, Supernatural, ...",8.17
144,Higurashi no Naku Koro ni Kai,"Mystery, Psychological, Supernatural, Thriller",8.41
38,Monster,"Drama, Horror, Mystery, Police, Psychological,...",8.72
702,Another,"Horror, Mystery, School, Supernatural, Thriller",7.88
250,Zankyou no Terror,"Psychological, Thriller",8.26
199,Death Parade,"Drama, Game, Mystery, Psychological, Thriller",8.33
96,Mahou Shoujo Madoka‚òÖMagica,"Drama, Magic, Psychological, Thriller",8.51


In [12]:
recommend_anime("Death Note", top_n=5, similarity_threshold=0.5)

Unnamed: 0,name,genre,rating
445,Mirai Nikki (TV),"Action, Mystery, Psychological, Shounen, Super...",8.07
981,Mousou Dairinin,"Drama, Mystery, Police, Psychological, Superna...",7.74
778,Death Note Rewrite,"Mystery, Police, Psychological, Supernatural, ...",7.84
334,Higurashi no Naku Koro ni,"Horror, Mystery, Psychological, Supernatural, ...",8.17
144,Higurashi no Naku Koro ni Kai,"Mystery, Psychological, Supernatural, Thriller",8.41


# üìä PERFORMANCE & IMPROVEMENT
‚úî Strengths

Genre + numerical features combined

Scalable using cosine similarity

No user history required

‚ùå Limitations

Cold start problem

No personalized user preferences

üîß Improvements

Add user ratings matrix

Use collaborative filtering

Hybrid recommendation system

# INTERVIEW QUESTIONS 
1Ô∏è‚É£ Difference between User-Based & Item-Based Collaborative Filtering

User-Based:

Finds similar users

Recommends items liked by similar users

Item-Based:

Finds similar items

Recommends items similar to what user liked
‚úÖ More scalable and stable

2Ô∏è‚É£ What is Collaborative Filtering?

Collaborative filtering recommends items based on:

User behavior

Ratings

Preferences
It assumes users with similar tastes will like similar items.