In [4]:
import pandas as pd

anime = pd.read_csv('anime.csv')

anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12230 entries, 0 to 12229
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12230 non-null  int64  
 1   name      12230 non-null  object 
 2   genre     12230 non-null  object 
 3   type      12208 non-null  object 
 4   episodes  12230 non-null  object 
 5   rating    12230 non-null  float64
 6   members   12230 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 669.0+ KB


In [42]:

import html

# Assuming df is your existing DataFrame
anime['name'] = anime['name'].apply(lambda x: html.unescape(x) if isinstance(x, str) else x)


In [43]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama',"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [45]:
anime = anime.dropna(subset=['genre'])
anime = anime.drop_duplicates(subset='name')

anime = anime.reset_index(drop=True)

anime['rating'] = anime['rating'].fillna(0)

In [47]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12230 entries, 0 to 12229
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12230 non-null  int64  
 1   name      12230 non-null  object 
 2   genre     12230 non-null  object 
 3   type      12208 non-null  object 
 4   episodes  12230 non-null  object 
 5   rating    12230 non-null  float64
 6   members   12230 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 669.0+ KB


In [48]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(anime['genre'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

anime_indices = pd.Series(anime.index, index=anime['name']).to_dict()

In [49]:
def recommend_anime(fav_anime, genre_hint=None, top_n=10):
    if fav_anime not in anime_indices:
        return []

    idx = anime_indices[fav_anime]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]

    anime_indices_similar = [i[0] for i in sim_scores]
    recommendations = anime.iloc[anime_indices_similar]

    if genre_hint:
        recommendations = recommendations[recommendations['genre'].str.contains(genre_hint, case=False, na=False)]

    return recommendations[['anime_id','name', 'genre', 'rating']].head(top_n)

In [50]:
recommend_anime("Bleach", genre_hint="Action", top_n=5)

Unnamed: 0,anime_id,name,genre,rating
946,8247,Bleach Movie 4: Jigoku-hen,"Action, Comedy, Shounen, Super Power, Supernat...",7.75
1131,4835,Bleach Movie 3: Fade to Black - Kimi no Na wo ...,"Action, Comedy, Shounen, Super Power, Supernat...",7.66
3287,11703,Code:Breaker,"Action, Comedy, School, Shounen, Super Power, ...",7.03
1244,18499,Yozakura Quartet: Tsuki ni Naku,"Action, Comedy, Magic, Shounen, Super Power, S...",7.62
1271,18497,Yozakura Quartet: Hana no Uta,"Action, Comedy, Magic, Shounen, Super Power, S...",7.61


In [51]:

import pickle

model = {
    'anime': anime,
    'cosine_sim': cosine_sim,
    'anime_indices': anime_indices
}

with open('anime_recommender.pkl', 'wb') as f:
    pickle.dump(model, f)
     

In [52]:
model['anime']


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama',"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12225,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12226,5543,Under World,Hentai,OVA,1,4.28,183
12227,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12228,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [53]:
model['anime'].name

0                                           Kimi no Na wa.
1                         Fullmetal Alchemist: Brotherhood
2                                                 Gintama°
3                                              Steins;Gate
4                                                 Gintama'
                               ...                        
12225         Toushindai My Lover: Minami tai Mecha-Minami
12226                                          Under World
12227                       Violence Gekiga David no Hoshi
12228    Violence Gekiga Shin David no Hoshi: Inma Dens...
12229                     Yasuji no Pornorama: Yacchimae!!
Name: name, Length: 12230, dtype: object