In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load dataset
anime = pd.read_csv('anime.csv')
anime.head()
anime.isnull().sum()
anime.info()
print('-'*125)
# Data Preprocessing
anime['genre'] = anime['genre'].fillna('none')
anime['type'] = anime['type'].fillna('none')
anime['episodes'] = pd.to_numeric(anime['episodes'], errors='coerce')
anime['episodes'] = anime['episodes'].fillna(anime['episodes'].mode()[0])
anime['rating'] = anime['rating'].fillna(anime['rating'].median())
 # Log transform to reduce skewnes
anime['members'] = np.log1p(anime['members']) 

# Feature Engineering
# TF-IDF for genres
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
genre_matrix = vectorizer.fit_transform(anime['genre'])

# One-hot encoding
type_matrix = pd.get_dummies(anime['type'])

# Numerical features
from sklearn.preprocessing import MinMaxScaler
num_features = anime[['episodes', 'rating', 'members']]
scaler = MinMaxScaler()
num_features_scaled = scaler.fit_transform(num_features)

# Combine all features
features = np.hstack((genre_matrix.toarray(), type_matrix.values, num_features_scaled))

# Compute Cosine Similarity
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(features)

# Create DataFrame for easy lookup
anime_sim_df = pd.DataFrame(cosine_sim, index=anime['name'], columns=anime['name'])

# Recommendation Function
def recommend_anime(title, top_n=10):
    if title not in anime_sim_df.index:
        return f"Anime '{title}' not found in the dataset."
    # Get similarity scores
    similar_animes = anime_sim_df[title].sort_values(ascending=False).iloc[:top_n+1]
    # Return recommendations excluding the input anime
    return similar_animes.iloc[1:].index.tolist()
    
# Example 
anime_name = "Naruto"
recommendations = recommend_anime(anime_name,top_n=8)
print(f"Top Recommendations for '{anime_name:}'  \n", recommendations)
print('-'*125)
# Threshold function for dynamic acessing
def threshold_eval(rec,threshold):
    pred_val=np.round(rec,3)
    grnd_truth=(pred_val > 0.25).astype(int)
    pred_val=(pred_val >= threshold).astype(int)
    # Converting into single list
    y_act=grnd_truth.flatten()
    y_pred=pred_val.flatten()
    # Evaluating metrics for model evaluation
    from sklearn.metrics import f1_score,precision_score,recall_score
    precision=precision_score(y_act,y_pred,zero_division=0)
    recall=recall_score(y_act,y_pred,zero_division=0)
    f1=f1_score(y_act,y_pred,zero_division=0)
    return recall,precision,f1 
# Example 
rec = anime_sim_df.iloc[1:15, 1:15].values.tolist()
# Change this treshold dynamically as needed
threshold = 0.5 
rec = np.array(rec)
recall, precision, f1 = threshold_eval(rec, threshold)
print(f'Precision _score = {precision :.3f} ')
print(f'Recall_score = {recall:.3f}')
print(f'F1_score = {f1:.3f}')



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
-----------------------------------------------------------------------------------------------------------------------------
Top Recommendations for 'Naruto'  
 ['Naruto: Shippuuden', 'Dragon Ball Z', 'Dragon Ball Kai', 'Dragon Ball Super', 'Rekka no Honoo', 'Kurokami The Animation', 'Dragon Ball Kai (2014)', 'Dragon Ball']
-----------------------------------------------------------------------------------------------------------------------------
Precision _score = 1.000

### Interview Questions
#### 1
    1.User-based collaborative filtering finds similar users based on their past interactions and recommends items liked by those similar users. It relies on user-user similarity and is useful when users have overlapping preferences. Item-based collaborative filtering, on the other hand, finds similar items based on user interaction history and recommends items similar to those a user has already liked. It computes item-item similarity, making it more stable as item preferences change less frequently than user behavior. Item-based filtering is generally preferred for large-scale systems due to its efficiency in precomputing similarities.

#### 2
    Collaborative filtering is a recommendation technique that suggests items based on user behavior and preferences rather than explicit features. It works by analyzing historical interactions, such as user ratings, clicks, or purchases, to identify patterns. There are two main types: user-based,hich finds similar users to suggest items they liked, and item-based, which finds similar items based on user interactions. It relies on the assumption that users with similar preferences in the past will continue to have similar tastes. The method can use similarity metrics like cosine similarity or Pearson correlation to determine relationships. Collaborative filtering is widely used in recommendation systems like Netflix, Amazon,Spotify and Other.







