In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df=pd.read_csv('anime.csv')

Data Preprocessing

In [None]:

# Handle missing values
df = df.dropna()

# Explore the dataset
print(df.head())
print(df.info())
print(df.describe())

   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
<class 'pandas.core.frame.DataFrame'>
Index: 12017 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
--- 

Feature Extraction

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# Convert categorical features into numerical representations
le = LabelEncoder()
df['genre'] = le.fit_transform(df['genre'])

# Normalize numerical features
scaler = StandardScaler()
df[['rating']] = scaler.fit_transform(df[['rating']])

Recommendation System

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_anime(target_anime_id, num_recommendations):
    # Get the features of the target anime
    target_anime_features = df.loc[target_anime_id, ['genre', 'rating']]

    # Calculate the cosine similarity scores
    similarity_scores = cosine_similarity(target_anime_features.values.reshape(1, -1),df[['genre', 'rating']])

    # Get the indices of the top N similar anime
    top_indices = np.argsort(similarity_scores[0])[-num_recommendations:]

    # Return the recommended anime
    return df.iloc[top_indices]

# Example usage:
recommended_anime = recommend_anime(0, 5)
print(recommended_anime)

       anime_id                             name  genre   type episodes  \
10786     26313  Yakusoku: Africa Mizu to Midori   2556    OVA        1   
483        2618                       Takarajima   1440     TV       26   
156        1210                  NHK ni Youkoso!   1765     TV       24   
1019      32668              Noblesse: Awakening   1142    ONA        1   
0         32281                   Kimi no Na wa.   2660  Movie        1   

         rating  members  
10786  2.707265       53  
483    1.525410     5306  
156    1.877036   291228  
1019   1.212853    28864  
0      2.824474   200630  


Evaluation

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Split the dataset into training and testing sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Evaluate the recommendation system
def evaluate_recommendation_system(recommended_anime, actual_anime):
    # Extract anime IDs from recommended and actual anime DataFrames
    recommended_ids = recommended_anime['anime_id'].tolist()
    actual_ids = actual_anime['anime_id'].tolist()

    # Calculate precision, recall, and F1-score using anime IDs
    precision = precision_score(actual_ids, recommended_ids, average='micro') # Use 'micro' for overall precision
    recall = recall_score(actual_ids, recommended_ids, average='micro') # Use 'micro' for overall recall
    f1 = f1_score(actual_ids, recommended_ids, average='micro') # Use 'micro' for overall F1-score
    return precision, recall, f1

# Example usage:
recommended_anime = recommend_anime(0, 5)
actual_anime = test_df.iloc[:5]
precision, recall, f1 = evaluate_recommendation_system(recommended_anime, actual_anime)
print(f'Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}')

Precision: 0.0000, Recall: 0.0000, F1-score: 0.0000


interview questions:

1ST - Can you explain the difference between user-based and item-based collaborative filtering?

ANS = Collaborative Filtering and Its Types

Collaborative filtering is a technique used in recommendation systems to predict a user's preferences based on the behavior of similar users.

Collaborative Filtering: Collaborative filtering works by analyzing the behavior of users and items in a system. It identifies patterns and relationships between users and items to make recommendations.



2ND - What is collaborative filtering, and how does it work?

ANS = User-Based vs Item-Based Collaborative Filtering:

User-Based: Focuses on finding similar users and recommending items liked by those users.
Item-Based: Focuses on finding similar items and recommending them to users who liked those items