# 1. Data Preprocessing:

In [1]:
import pandas as pd

In [2]:
df=pd.read_csv("/content/anime.csv")

In [3]:
df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


Handling Missing Values

In [4]:
df.isnull().sum()

Unnamed: 0,0
anime_id,0
name,0
genre,62
type,25
episodes,0
rating,230
members,0


In [6]:
df = df.dropna()

Explore the Dataset

In [7]:
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [8]:
df.describe()

Unnamed: 0,anime_id,rating,members
count,12017.0,12017.0,12017.0
mean,13638.001165,6.478264,18348.88
std,11231.076675,1.023857,55372.5
min,1.0,1.67,12.0
25%,3391.0,5.89,225.0
50%,9959.0,6.57,1552.0
75%,23729.0,7.18,9588.0
max,34519.0,10.0,1013917.0


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 12017 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12017 non-null  int64  
 1   name      12017 non-null  object 
 2   genre     12017 non-null  object 
 3   type      12017 non-null  object 
 4   episodes  12017 non-null  object 
 5   rating    12017 non-null  float64
 6   members   12017 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 751.1+ KB


In [10]:
df.shape

(12017, 7)

# 2. Feature Extraction:

Convert Categorical Features

In [12]:
print(df.columns)

Index(['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members'], dtype='object')


In [13]:
df['genre'] = df['genre'].apply(lambda x: x.split(','))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['genre'] = df['genre'].apply(lambda x: x.split(','))


In [14]:
genre_encoded = df['genre'].str.join('|').str.get_dummies()

Normalization

In [15]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [18]:
df[['rating', 'members']] = scaler.fit_transform(df[['rating', 'members']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['rating', 'members']] = scaler.fit_transform(df[['rating', 'members']])


# 3. Recommendation System:

Cosine Similarity Calculation

In [22]:
from sklearn.metrics.pairwise import cosine_similarity

In [23]:
features = pd.concat([df[['rating', 'members']], genre_encoded], axis=1)


In [24]:
cosine_sim = cosine_similarity(features)

Recommendation Function

In [88]:
def recommend_anime(anime_id, cosine_sim,df):
    idx = df[df['anime_id'] == anime_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    anime_indices = [i[0] for i in sim_scores[1:11]]  # Get top 10 similar anime
    return df['anime_id'].iloc[anime_indices]

# 4. Evaluation:

Split the Dataset

In [105]:
from sklearn.model_selection import train_test_split

In [106]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

Evaluate the Recommendation System

In [107]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [116]:
def evaluate_recommendations(true_relevant, recommended):
    precision = precision_score(true_relevant, recommended)
    recall = recall_score(true_relevant, recommended)
    f1 = f1_score(true_relevant, recommended)

    return precision, recall, f1

In [114]:
import numpy as np
true_relevant = np.array([1, 0, 1, 1, 0, 0, 1, 0, 1, 0])
recommended = np.array([1, 1, 0, 1, 0, 0, 1, 0, 0, 0])


In [117]:
precision, recall, f1 = evaluate_recommendations(true_relevant, recommended)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

Precision: 0.75
Recall: 0.60
F1-score: 0.67
