In [None]:
#import packages that needed

import pandas as pd
import numpy as np
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
% matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#import the dataset used in this project

anime = pd.read_csv('/content/drive/My Drive/My Mini Projects/Recommender System/anime.csv')
rate = pd.read_csv('/content/drive/My Drive/My Mini Projects/Recommender System/rating.csv', sep = ';', error_bad_lines = False, encoding = 'latin-1')



  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
print(anime.shape)
print(rate.shape)

(12294, 7)
(1048575, 3)


# **Exploring Anime Dataset**

In [None]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
# some attributes dont seem to be required for this analysis, so can be dropped off

anime.drop(['type', 'episodes', 'rating', 'members'], axis = 1, inplace = True)

In [None]:
anime.dtypes

anime_id     int64
name        object
genre       object
dtype: object

In [None]:
#Every genre is separated by a ',' so we simply have to call the split function on ','

anime['genre'] = anime.genre.str.split(',')
anime.head()

Unnamed: 0,anime_id,name,genre
0,32281,Kimi no Na wa.,"[Drama, Romance, School, Supernatural]"
1,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic,..."
2,28977,Gintama°,"[Action, Comedy, Historical, Parody, Samur..."
3,9253,Steins;Gate,"[Sci-Fi, Thriller]"
4,9969,Gintama&#039;,"[Action, Comedy, Historical, Parody, Samur..."


In [None]:
print(type(anime.genre))
anime.genre.isnull().sum()

<class 'pandas.core.series.Series'>


62

In [None]:
#drop null rows because the data is still enough

anime = anime.dropna(axis=0)

In [None]:
#use one-hot-encoding to store every different genre in columns that contain either 1 or 0
#1 shows that the movie has that genre, 0 otherwise

#copying original dataset to new dataset
animeWithGenres = anime.copy()

#For every row in the dataframe, iterate through the list of genres and place a 1 into the corresponding column
for index,row in anime.iterrows():
  for genre in row['genre']:
    animeWithGenres.at[index,genre] = 1
  
#Filling in the NaN values with 0 to show that a movie doesn't have that column's genre
animeWithGenres = animeWithGenres.fillna(0)

animeWithGenres.head()

Unnamed: 0,anime_id,name,genre,Drama,Romance,School,Supernatural,Action,Adventure,Drama.1,...,Supernatural.1,Samurai,Super Power,Vampire,Space,Hentai,Yaoi,Hentai.1,Yuri,Yaoi.1
0,32281,Kimi no Na wa.,"[Drama, Romance, School, Supernatural]",1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic,...",0.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,28977,Gintama°,"[Action, Comedy, Historical, Parody, Samur...",0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,9253,Steins;Gate,"[Sci-Fi, Thriller]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,9969,Gintama&#039;,"[Action, Comedy, Historical, Parody, Samur...",0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# **Exploring Rate Dataset**

In [None]:
rate.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


# **Content Based Recommender System**

In [None]:
#user input about anime that he has watched and how he rates the anime

userInput = [
            {'name':'Fullmetal Alchemist: Brotherhood', 'rating':5},
            {'name':'Gintama', 'rating':3.5},
            {'name':'Koe no Katachi', 'rating':2},
            {'name':'Bakemonogatari', 'rating':5},
            {'name':'Nodame Cantabile Finale', 'rating':4.5}
         ] 
inputAnime = pd.DataFrame(userInput)
inputAnime

Unnamed: 0,name,rating
0,Fullmetal Alchemist: Brotherhood,5.0
1,Gintama,3.5
2,Koe no Katachi,2.0
3,Bakemonogatari,5.0
4,Nodame Cantabile Finale,4.5


In [None]:
#add anime_id to userInput

#filtering out the movies by title
inputID = anime[anime['name'].isin(inputAnime.name.tolist())]

#merge anime in the form inputID & inputAnime by the title
inputAnime = pd.merge(inputID,inputAnime)

#drop the attribute that doesnt needed
inputAnime = inputAnime.drop('genre',axis=1)

inputAnime

Unnamed: 0,anime_id,name,rating
0,5114,Fullmetal Alchemist: Brotherhood,5.0
1,28851,Koe no Katachi,2.0
2,918,Gintama,3.5
3,5081,Bakemonogatari,5.0
4,5690,Nodame Cantabile Finale,4.5


In [None]:
#Filtering out the anime from the input

userAnime = animeWithGenres[animeWithGenres['anime_id'].isin(inputAnime['anime_id'].tolist())]
userAnime

Unnamed: 0,anime_id,name,genre,Drama,Romance,School,Supernatural,Action,Adventure,Drama.1,...,Supernatural.1,Samurai,Super Power,Vampire,Space,Hentai,Yaoi,Hentai.1,Yuri,Yaoi.1
1,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic,...",0.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,28851,Koe no Katachi,"[Drama, School, Shounen]",1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,918,Gintama,"[Action, Comedy, Historical, Parody, Samur...",0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
160,5081,Bakemonogatari,"[Mystery, Romance, Supernatural, Vampire]",0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
167,5690,Nodame Cantabile Finale,"[Comedy, Josei, Music, Romance]",0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#Resetting the index to avoid future issues
userAnime = userAnime.reset_index(drop=True)

In [None]:
#Dropping unnecessary issues due to save memory and to avoid issues
userGenreTable = userAnime.drop(['anime_id','name','genre'], 1)

userGenreTable

  


Unnamed: 0,Drama,Romance,School,Supernatural,Action,Adventure,Drama.1,Fantasy,Magic,Military,...,Supernatural.1,Samurai,Super Power,Vampire,Space,Hentai,Yaoi,Hentai.1,Yuri,Yaoi.1
0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#dot produt to get weights

userProfile = userGenreTable.transpose().dot(inputAnime['rating'])
userProfile

Drama            2.0
 Romance         9.5
 School          2.0
 Supernatural    5.0
Action           8.5
                ... 
Hentai           0.0
 Yaoi            0.0
 Hentai          0.0
 Yuri            0.0
Yaoi             0.0
Length: 82, dtype: float64

In [None]:
#Now let's get the genres of every movie in our original dataframe
genreTable = animeWithGenres.set_index(animeWithGenres['anime_id'])

#And drop the unnecessary information
genreTable = genreTable.drop(['anime_id','name','genre'], 1)
genreTable.head()

  """


Unnamed: 0_level_0,Drama,Romance,School,Supernatural,Action,Adventure,Drama,Fantasy,Magic,Military,...,Supernatural,Samurai,Super Power,Vampire,Space,Hentai,Yaoi,Hentai,Yuri,Yaoi
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32281,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5114,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28977,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9253,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9969,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#Multiply the genres by the weights and then take the weighted average
recommendationTable = ((genreTable*userProfile).sum(axis=1))/(userProfile.sum())
recommendationTable.head()

anime_id
32281    0.178744
5114     0.425121
28977    0.352657
9253     0.000000
9969     0.352657
dtype: float64

In [None]:
#Sort our recommendations in descending order
recommendationTable = recommendationTable.sort_values(ascending=False)

#Just a peek at the values
recommendationTable.head()

anime_id
1397    0.531401
4938    0.516908
249     0.502415
6811    0.502415
1224    0.497585
dtype: float64

In [None]:
#The final recommendation table
anime.loc[anime['anime_id'].isin(recommendationTable.head(20).keys())]

Unnamed: 0,anime_id,name,genre
1,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic,..."
173,6811,InuYasha: Kanketsu-hen,"[Action, Adventure, Comedy, Demons, Fantas..."
200,121,Fullmetal Alchemist,"[Action, Adventure, Comedy, Drama, Fantasy..."
286,4938,Tsubasa: Shunraiki,"[Action, Adventure, Drama, Fantasy, Magic,..."
690,249,InuYasha,"[Action, Adventure, Comedy, Demons, Fantas..."
1071,969,Tsubasa Chronicle 2nd Season,"[Action, Adventure, Drama, Fantasy, Myster..."
1129,177,Tsubasa Chronicle,"[Action, Adventure, Fantasy, Magic, Romanc..."
1286,218,Kidou Senkan Nadesico,"[Action, Comedy, Mecha, Military, Parody, ..."
1347,1459,Uchuu no Kishi Tekkaman Blade,"[Action, Adventure, Drama, Mecha, Military..."
1558,9135,Fullmetal Alchemist: The Sacred Star of Milos,"[Action, Adventure, Comedy, Drama, Fantasy..."
