# Anime Recommendation Model

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("anime.csv", index_col="anime_id")
df.head()

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [2]:
# remove all 18+ animes for age-restriction purposes and nan values
anime = df[~df.genre.str.contains("Hentai", na=False)]
anime = anime.dropna()
df = df.dropna()

In [3]:
df.tail()

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
5543,Under World,Hentai,OVA,1,4.28,183
5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175
26081,Yasuji no Pornorama: Yacchimae!!,Hentai,Movie,1,5.46,142


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12017 entries, 32281 to 26081
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   name      12017 non-null  object 
 1   genre     12017 non-null  object 
 2   type      12017 non-null  object 
 3   episodes  12017 non-null  object 
 4   rating    12017 non-null  float64
 5   members   12017 non-null  int64  
dtypes: float64(1), int64(1), object(4)
memory usage: 657.2+ KB


In [5]:
df.genre.count()

12017

In [6]:
anime.genre.count()

10884

In [7]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10884 entries, 32281 to 30663
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   name      10884 non-null  object 
 1   genre     10884 non-null  object 
 2   type      10884 non-null  object 
 3   episodes  10884 non-null  object 
 4   rating    10884 non-null  float64
 5   members   10884 non-null  int64  
dtypes: float64(1), int64(1), object(4)
memory usage: 595.2+ KB


In [8]:
anime.isnull().sum()

name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [9]:
df.isnull().sum()

name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [10]:
# Drop the animes with null values
df_clean = df[df.genre.notna() & df.type.notna()]

# First, split the genre column by comma and expand the list so there is
# a column for each genre. Now we have 12 columns, because the anime with
# most genres tags has 12 tags
genres = df_clean.genre.str.split(", ", expand=True)

# Now we can get the list of unique genres. We "convert" the dataframe into
# a single dimension array and take the unique values
unique_genres = pd.Series(genres.values.ravel('K')).dropna().unique()

# Getting the dummy variables will result in having a lot more columns than unique genres
dummies = pd.get_dummies(genres)

# So we sum up the columns with the same genre to have a single column for each genre
for genre in unique_genres:
    df_clean["Genre: " + genre] = dummies.loc[:, dummies.columns.str.endswith(genre)].sum(axis=1)
    
# Add the type dummies
type_dummies = pd.get_dummies(df_clean.type, prefix="Type:", prefix_sep=" ")
df_clean = pd.concat([df_clean, type_dummies], axis=1)

df_clean = df_clean.drop(columns=["name", "type", "genre", "episodes", "rating", "members"])
df_clean.head()

Unnamed: 0_level_0,Genre: Drama,Genre: Action,Genre: Sci-Fi,Genre: Comedy,Genre: Adventure,Genre: Fantasy,Genre: Mystery,Genre: Psychological,Genre: Ecchi,Genre: Josei,...,Genre: Yaoi,Genre: Shoujo Ai,Genre: Shounen Ai,Genre: Yuri,Type: Movie,Type: Music,Type: ONA,Type: OVA,Type: Special,Type: TV
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32281,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
5114,1,1,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
28977,0,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9253,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9969,0,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [11]:
#For 18+
anime_clean = anime[anime.genre.notna() & anime.type.notna()]
adult_genres = anime_clean.genre.str.split(", ", expand=True)
adult_unique_genres = pd.Series(adult_genres.values.ravel('K')).dropna().unique()
adult_dummies = pd.get_dummies(adult_genres)
for genre in adult_unique_genres:
    anime_clean["Genre: " + genre] = dummies.loc[:, dummies.columns.str.endswith(genre)].sum(axis=1)
    
adult_type_dummies = pd.get_dummies(anime_clean.type, prefix="Type:", prefix_sep=" ")
anime_clean = pd.concat([anime_clean, adult_type_dummies], axis=1)

anime_clean = anime_clean.drop(columns=["name", "type", "genre", "episodes", "rating", "members"])
anime_clean.head()

Unnamed: 0_level_0,Genre: Drama,Genre: Action,Genre: Sci-Fi,Genre: Comedy,Genre: Adventure,Genre: Fantasy,Genre: Mystery,Genre: Psychological,Genre: Ecchi,Genre: Josei,...,Genre: Yaoi,Genre: Shoujo Ai,Genre: Shounen Ai,Genre: Yuri,Type: Movie,Type: Music,Type: ONA,Type: OVA,Type: Special,Type: TV
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32281,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
5114,1,1,0,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
28977,0,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9253,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9969,0,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


## Building the recommendation model

In [14]:
from sklearn.neighbors import NearestNeighbors# Helper function to get the features of an anime given its name
def get_features(name):
    return df_clean.loc[df[df.name == name].index]

def anime_recommend_adult():
    names = input('Enter your favourite anime: ')
    try:
        # Build and train the model
        neigh = NearestNeighbors(15)
        neigh.fit(df_clean.values)

        # Get the features of this anime
        compare = get_features(names)

        # Get the indices of the most similar items found
        # Note: these are ignoring the dataframe indices and starting from 0
        index = neigh.kneighbors(compare, return_distance=False)
        age = int(input('Enter your age '))
        if(age >=18):
            # Show the details of the items found
            return df.loc[df.index[index][0]]
        else:
            print('This section contains anime for adults. Try using the anime_recommend function for kid friendly content :)')

        
    except:
        print("Anime Not Found, Enter full name of the anime or Check the spelling")

In [26]:
from sklearn.neighbors import NearestNeighbors# Helper function to get the features of an anime given its name
def get_features_2(name):
    return anime_clean.loc[anime[anime.name == name].index]

def anime_recommend():
    names = input('Enter your favourite anime: ')
    try:
        # Build and train the model
        neigh = NearestNeighbors(20)
        neigh.fit(anime_clean.values)

        # Get the features of this anime
        compare = get_features_2(names)

        # Get the indices of the most similar items found
        # Note: these are ignoring the dataframe indices and starting from 0
        index = neigh.kneighbors(compare, return_distance=False)

        # Show the details of the items found
        return anime.loc[anime.index[index][0]]
    except: 
        print('Anime Not Found')

In [27]:
anime_recommend()

Enter your favourite anime: Naruto


Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1735,Naruto: Shippuuden,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,Unknown,7.94,533578
20,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
1604,Katekyo Hitman Reborn!,"Action, Comedy, Shounen, Super Power",TV,203,8.37,258103
968,Kakutou Bijin Wulong: Rebirth,"Action, Comedy, Martial Arts, School, Shounen",TV,25,7.12,1895
6033,Dragon Ball Kai,"Action, Adventure, Comedy, Fantasy, Martial Ar...",TV,97,7.95,116832
5079,Kurokami The Animation,"Action, Martial Arts, Super Power",TV,23,7.29,72750
30694,Dragon Ball Super,"Action, Adventure, Comedy, Fantasy, Martial Ar...",TV,Unknown,7.4,111443
32365,Boruto: Naruto the Movie - Naruto ga Hokage ni...,"Action, Comedy, Martial Arts, Shounen, Super P...",Special,1,7.68,16868
269,Bleach,"Action, Comedy, Shounen, Super Power, Supernat...",TV,366,7.95,624055
6325,Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...,"Action, Comedy, Martial Arts, Shounen, Super P...",Movie,1,7.5,83515


In [17]:
anime_recommend_adult()

Enter your favourite anime: Under World
Enter your age 19


Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
33138,Nama Lo Re: Namakemono The Animation,Hentai,OVA,1,6.43,1529
2194,Mejoku,Hentai,OVA,2,6.43,2830
1779,Bakunyuu Shimai,Hentai,OVA,2,6.4,2381
8653,Mahou Shoujo Isuka,Hentai,OVA,3,6.41,2600
8111,Last Waltz: Hakudaku Mamire no Natsu Gasshuku,Hentai,OVA,2,6.41,2748
6235,Immoral,Hentai,OVA,2,6.41,2189
24273,Iinari! Saimin Kanojo,Hentai,OVA,2,6.41,2867
1339,Cool Devices,Hentai,OVA,11,6.41,3966
6402,Bakunyuu Maid Kari,Hentai,OVA,2,6.41,2588
11523,Shiofuki Mermaid,Hentai,OVA,1,6.42,1690


In [23]:
anime_recommend()

Enter your favourite anime: Nandaka Velonica


Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1960,Sore Ike! Anpanman,"Comedy, Fantasy, Kids",TV,Unknown,6.61,940
2708,Wankorobee,"Comedy, Fantasy, Kids",TV,26,5.71,95
23107,GO-GO Tamagotchi!,"Comedy, Fantasy, Kids",TV,50,7.17,198
3180,Manga Aesop Monogatari (TV),"Comedy, Fantasy, Kids",TV,52,6.5,175
11521,Hello Kitty: Ringo no Mori no Fantasy,"Comedy, Fantasy, Kids",TV,13,5.84,201
307,Kerokko Demetan,"Comedy, Fantasy, Kids",TV,39,6.04,533
30232,Tamagotchi! Tamatomo Daishuu GO,"Comedy, Fantasy, Kids",TV,26,6.75,187
19843,Tamagotchi! Miracle Friends,"Comedy, Fantasy, Kids",TV,29,7.0,207
22381,Nandaka Velonica,"Comedy, Fantasy, Kids",TV,10,6.14,737
4101,Wan Wan Celepoo Soreyuke! Tetsunoshin,"Comedy, Fantasy, Kids",TV,51,6.81,432
