In [17]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd

# Load the dataset
data = df
df = pd.read_csv(r"C:\Users\Acer\Desktop\Data Sci Assignments\Recommendation System\anime.csv")

# Handle missing values
df.fillna({'genre': 'Unknown', 'rating': 0, 'members': 0}, inplace=True)

# Explore the dataset
print(df.head())
print(df.info())


   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  

In [24]:
# Feature Extraction
# Convert categorical features into numerical representations
ohe = OneHotEncoder()
genre_encoded = ohe.fit_transform(df[['genre']]).toarray()

In [25]:
# Normalize numerical features
scaler = MinMaxScaler()
df['rating'] = scaler.fit_transform(df[['rating']])
df['members'] = scaler.fit_transform(df[['members']])

In [26]:
# Combine all features into a single matrix
features = pd.concat([pd.DataFrame(genre_encoded), df[['rating', 'members']]], axis=1)


In [37]:
# Recommendation System
def recommend_anime(anime_name, df, features, top_n=10):
    # Check if the anime name exists in the DataFrame
    if anime_name not in df['name'].values:
        print(f"Anime name '{anime_name}' not found in the dataset.")
        return []
    
    # Find the index of the anime
    idx = df[df['name'] == anime_name].index[0]
    print(f"Index of '{anime_name}': {idx}")
    
    # Compute cosine similarity
    cos_sim = cosine_similarity(features)
    
    # Get similarity scores for the target anime
    sim_scores = list(enumerate(cos_sim[idx]))
    
    # Sort the anime based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top N similar anime
    sim_scores = sim_scores[1:top_n+1]
    
    # Get the indices of the similar anime
    anime_indices = [i[0] for i in sim_scores]
    
    # Return the top N similar anime names
    return df['name'].iloc[anime_indices].tolist()


In [38]:
# Example recommendation
print("Recommended anime for 'Naruto':", recommend_anime('Naruto', df, features))


Index of 'Naruto': 841
Recommended anime for 'Naruto': ['Naruto: Shippuuden', 'Naruto: Shippuuden Movie 4 - The Lost Tower', 'Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsugu Mono', 'Boruto: Naruto the Movie', 'Naruto Soyokazeden Movie: Naruto to Mashin to Mitsu no Onegai Dattebayo!!', 'Naruto x UT', 'Boruto: Naruto the Movie - Naruto ga Hokage ni Natta Hi', 'Naruto Shippuuden: Sunny Side Battle', 'Death Note', 'Shingeki no Kyojin']


In [None]:
# Evaluation
# Split the dataset into training and testing sets
train, test = train_test_split(df, test_size=0.2, random_state=42)

# Calculate precision, recall, and F1-score
y_true = test['name']
y_pred = [recommend_anime(name, df, features, top_n=1)[0] for name in test['name'] if recommend_anime(name, df, features, top_n=1)]

precision = precision_score(y_true, y_pred, average='micro', zero_division=0)
recall = recall_score(y_true, y_pred, average='micro', zero_division=0)
f1 = f1_score(y_true, y_pred, average='micro', zero_division=0)

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-score: {f1:.2f}')

Index of 'Suzy&#039;s Zoo: Daisuki! Witzy - Happy Birthday': 6329
Index of 'Suzy&#039;s Zoo: Daisuki! Witzy - Happy Birthday': 6329
Index of 'Tactics': 2167
Index of 'Tactics': 2167
Index of 'Kamen no Maid Guy': 2882
Index of 'Kamen no Maid Guy': 2882
Index of 'Take Your Way': 4700
Index of 'Take Your Way': 4700
Index of 'Rinkaku': 7258
Index of 'Rinkaku': 7258
Index of 'Suisei no Gargantia: Meguru Kouro, Haruka': 1730
Index of 'Suisei no Gargantia: Meguru Kouro, Haruka': 1730
Index of 'Digital Devil Story: Megami Tensei': 7443
Index of 'Digital Devil Story: Megami Tensei': 7443
Index of 'Chuunibyou demo Koi ga Shitai! Ren Lite': 2576
Index of 'Chuunibyou demo Koi ga Shitai! Ren Lite': 2576
Index of 'Kobo-chan': 9263
Index of 'Kobo-chan': 9263
Index of 'Sekaiichi Hatsukoi Movie: Yokozawa Takafumi no Baai': 311
Index of 'Sekaiichi Hatsukoi Movie: Yokozawa Takafumi no Baai': 311
Index of 'Mardock Scramble: The Second Combustion': 1570
Index of 'Mardock Scramble: The Second Combustion': 1

In [None]:
## 1. Difference between User-Based and Item-Based Collaborative Filtering:
#User-Based: Recommends items based on similar users' preferences.
#Item-Based: Recommends items based on similarity between items.

## 2. Collaborative Filtering:
#Predicts user preferences based on the preferences of similar users (user-based) or similar items (item-based). Uses historical interaction data for personalized recommendations.