In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import seaborn as sns
%matplotlib inline

In [2]:
anime = pd.read_csv("anime-recommendations-database/anime.csv")

In [3]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


# Clean up and Adding stuff manually

In [4]:
anime.loc[(anime["type"] == "Movie") & (anime["episodes"] == "Unknown")] = "1" # Movies are typically 1 'episode'
anime["members"] = anime["members"].astype(float)

In [5]:
preknown = {"Naruto Shippuuden":500, "One Piece":784,"Detective Conan":854, "Dragon Ball Super":86,
                "Crayon Shin chan":942, "Yu Gi Oh Arc V":148,"Shingeki no Kyojin Season 2":25,
                "Boku no Hero Academia 2nd Season":25,"Little Witch Academia TV":25} # Got this from the internet 

In [6]:
for i,j in preknown.items():    
    anime.loc[anime["name"]==i,"episodes"] = j

In [7]:
anime["episodes"] = anime["episodes"].map(lambda x:np.nan if x=="Unknown" else x)
anime["episodes"].fillna(anime["episodes"].median(),inplace = True)

## Filling up Unknown Values in Rating

In [8]:
anime["rating"] = anime["rating"].astype(float)
anime["rating"].fillna(anime["rating"].median(),inplace = True) # Filled Unknown Values with median of all values 

In [9]:
pd.get_dummies(anime[["type"]]).head() # Convert type to categorical values for ease

Unnamed: 0,type_1,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [12]:
## Using MaxAbsScaler as it preserves the sparsity while scaing the values from 0-1.

anime_features = pd.concat([anime["genre"].str.get_dummies(sep=","),pd.get_dummies(anime[["type"]]),
                            anime[["rating"]],anime[["members"]],anime["episodes"]],axis=1)

anime["name"] = anime["name"].map(lambda name:re.sub('[^A-Za-z0-9]+', " ", name))
anime_features.head() # TO DELETE 

Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,type_1,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV,rating,members,episodes
0,0,0,0,0,0,0,0,0,0,0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,9.37,200630.0,1
1,1,0,0,0,0,1,0,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.26,793665.0,64
2,0,0,1,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.25,114262.0,51
3,0,0,0,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.17,673572.0,24
4,0,0,1,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,9.16,151266.0,51


In [14]:
from sklearn.preprocessing import MaxAbsScaler

max_scaler = MaxAbsScaler()
anime_features = max_scaler.fit_transform(anime_features)

## Using KNN to find similar Animes

In [19]:
from sklearn.neighbors import NearestNeighbors

neigh = NearestNeighbors(n_neighbors = 5, algorithm = 'ball_tree').fit(anime_features)
distances, indices = neigh.kneighbors(anime_features)

In [20]:
# Need this function to call our main function if we get a name
def nametoindex(name):
    return anime[anime["name"] == name].index.tolist()[0]

In [26]:
# This function takes in either name or ID and can spit out some 'similar'
# animes using the KNN algorithm

def actualsimilar(idd):
    for i in indices[idd][1:]: # Removing the first one because it is itself
        print anime.ix[i]["name"]

def similaranimes(name = None, idd = None):
    if idd:
        return actualsimilar(idd)
    elif name:
        idd = nametoindex(name)
        return actualsimilar(idd)

In [27]:
similaranimes("Naruto")

Naruto Shippuuden
Katekyo Hitman Reborn 
Bleach
Dragon Ball Z


In [30]:
similaranimes(idd = 719)

Naruto Shippuuden Movie 6 Road to Ninja
Boruto Naruto the Movie
Naruto Shippuuden Movie 4 The Lost Tower
Naruto Shippuuden Movie 3 Hi no Ishi wo Tsugu Mono
