In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

In [2]:
df_full = pd.read_csv('anime.csv')
df_half = df_full.iloc[:850]

#### Using Half Dataset

In [3]:
df_half.head(10)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
6,11061,Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855
7,820,Ginga Eiyuu Densetsu,"Drama, Military, Sci-Fi, Space",OVA,110,9.11,80679
8,15335,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.1,72534
9,15417,Gintama&#039;: Enchousen,"Action, Comedy, Historical, Parody, Samurai, S...",TV,13,9.11,81109


In [4]:
len(df_half['name'])

850

In [5]:
df_half.isna().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [6]:
df_half['genre'].unique()

array(['Drama, Romance, School, Supernatural',
       'Action, Adventure, Drama, Fantasy, Magic, Military, Shounen',
       'Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen',
       'Sci-Fi, Thriller', 'Comedy, Drama, School, Shounen, Sports',
       'Action, Adventure, Shounen, Super Power',
       'Drama, Military, Sci-Fi, Space',
       'Drama, Fantasy, Romance, Slice of Life, Supernatural',
       'Drama, School, Shounen',
       'Action, Drama, Mecha, Military, Sci-Fi, Super Power',
       'Adventure, Drama, Supernatural',
       'Drama, Music, Romance, School, Shounen',
       'Adventure, Fantasy, Historical, Mystery, Seinen, Slice of Life, Supernatural',
       'Fantasy, Slice of Life',
       'Action, Mecha, Military, School, Sci-Fi, Super Power',
       'Comedy, Drama, Shounen, Sports',
       'Action, Drama, Historical, Martial Arts, Romance, Samurai',
       'Action, Adventure, Comedy, Drama, Sci-Fi, Space',
       'Action, Comedy, Parody, Sci-Fi, Seinen, Super 

In [7]:
# splitting genre by (', ')
ext = CountVectorizer(tokenizer = lambda x : x.split(', ')) 
mgenre = ext.fit_transform(df_half['genre'])

In [8]:
print(len(ext.get_feature_names()))
print(ext.get_feature_names())

40
['action', 'adventure', 'cars', 'comedy', 'dementia', 'demons', 'drama', 'ecchi', 'fantasy', 'game', 'harem', 'historical', 'horror', 'josei', 'kids', 'magic', 'martial arts', 'mecha', 'military', 'music', 'mystery', 'parody', 'police', 'psychological', 'romance', 'samurai', 'school', 'sci-fi', 'seinen', 'shoujo', 'shoujo ai', 'shounen', 'shounen ai', 'slice of life', 'space', 'sports', 'super power', 'supernatural', 'thriller', 'vampire']


In [9]:
mgenre.toarray()

array([[0, 0, 0, ..., 1, 0, 0],
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [10]:
cosScore = cosine_similarity(mgenre)

### Trying Recommender

In [11]:
# recommender
ani_rec = 'One Piece'

idx_rec = df_half[df_half['name'] == ani_rec].index[0]
idx_rec

74

In [12]:
# countvectorizer
ecv = CountVectorizer()

# recommend by :

# name
rec_name = ecv.fit_transform(df_half['name'])

# type
rec_type = ecv.fit_transform(df_half['type'])

# genre
rec_genre = ecv.fit_transform(df_half['genre'])

# genre and type
rec_genty = ecv.fit_transform((df_half['genre'])+(df_half['type']))

In [13]:
df_half['rating'] = df_half['rating'].astype('str')

In [14]:
# rating
rec_rating = ecv.fit_transform(df_half['rating'])

# rating and type
rec_raty = ecv.fit_transform((df_half['rating'])+(df_half['type']))

In [15]:
# applying recommendation
# name
CS_name = cosine_similarity(rec_name)

# type
CS_type = cosine_similarity(rec_type)

# genre
CS_genre = cosine_similarity(rec_genre)

# # rating
CS_rating = cosine_similarity(rec_rating)

# genre & type
CS_genty = cosine_similarity(rec_genty)

# # genre & rating
CS_raty = cosine_similarity(rec_raty)

In [16]:
# similar anime recommendation
sim_name = list(enumerate(CS_name[idx_rec]))
sim_type = list(enumerate(CS_type[idx_rec]))
sim_genre = list(enumerate(CS_genre[idx_rec]))
sim_rating = list(enumerate(CS_rating[idx_rec]))
sim_genty = list(enumerate(CS_genty[idx_rec]))
sim_raty = list(enumerate(CS_raty[idx_rec]))

In [17]:
# filter the cosine similarity > 70% 
sim_name_sort = list(filter(lambda x : x[1] > 0.7, sim_name))
sim_type_sort = list(filter(lambda x : x[1] > 0.7, sim_type))
sim_genre_sort = list(filter(lambda x : x[1] > 0.7, sim_genre))
sim_rating_sort = list(filter(lambda x : x[1] > 0.7, sim_rating))
sim_genty_sort = list(filter(lambda x : x[1] > 0.7, sim_genty))
sim_raty_sort = list(filter(lambda x : x[1] > 0.7, sim_raty))

In [18]:
# print recommendation
for i in sim_name_sort[1:10]:
    print(df_half.iloc[i[0]]['name'])

One Piece Film: Z
One Piece Film: Gold


In [19]:
for i in sim_type_sort[1:10]:
    print(df_half.iloc[i[0]]['name'])

Gintama°
Steins;Gate
Gintama&#039;
Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou
Hunter x Hunter (2011)
Gintama&#039;: Enchousen
Clannad: After Story
Gintama
Code Geass: Hangyaku no Lelouch R2


In [20]:
for i in sim_genre_sort[1:10]:
    print(df_half.iloc[i[0]]['name'])

One Piece
JoJo no Kimyou na Bouken: Diamond wa Kudakenai
Shingeki no Kyojin
Hunter x Hunter
Tsubasa: Tokyo Revelations
One Piece Film: Strong World
Hunter x Hunter OVA
Hunter x Hunter: Greed Island Final
One Piece Film: Z


In [21]:
for i in sim_rating_sort[1:10]:
    print(df_half.iloc[i[0]]['name'])

Hotaru no Haka
Kuroko no Basket 2nd Season
Major S5
One Piece


In [22]:
for i in sim_genty_sort[1:10]:
    print(df_half.iloc[i[0]]['name'])

One Piece
JoJo no Kimyou na Bouken: Diamond wa Kudakenai
Shingeki no Kyojin
Hunter x Hunter
One Piece Film: Strong World
One Piece Film: Z
Katekyo Hitman Reborn!
Boku no Hero Academia
Dragon Ball Z


In [23]:
for i in sim_raty_sort[1:10]:
    print(df_half.iloc[i[0]]['name'])

Major S5
One Piece


#### TESTING THE RECOMMENDER

In [40]:
original = df_half[df_half['name']=='One Piece']
by_name = df_half[df_half['name']=='One Piece Film: Z']
by_type = df_half[df_half['name']=='Gintama°']
by_genre = df_half[df_half['name']=='JoJo no Kimyou na Bouken: Diamond wa Kudakenai']
by_rating = df_half[df_half['name']=='Hotaru no Haka']
by_genretype = df_half[df_half['name']=='Shingeki no Kyojin']
by_ratingtype = df_half[df_half['name']=='Major S5']

In [48]:
pd.concat([original, by_name, by_type, by_genre, by_rating], keys = ['Original', 'By Name', 'By Type', 'By Genre', 'By Rating'])

Unnamed: 0,Unnamed: 1,anime_id,name,genre,type,episodes,rating,members
Original,74,21,One Piece,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",TV,Unknown,8.58,504862
By Name,163,12859,One Piece Film: Z,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",Movie,1,8.39,76051
By Type,2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
By Genre,76,31933,JoJo no Kimyou na Bouken: Diamond wa Kudakenai,"Action, Adventure, Comedy, Drama, Shounen, Sup...",TV,39,8.57,74074
By Rating,71,578,Hotaru no Haka,"Drama, Historical",Movie,1,8.58,174878


In [49]:
pd.concat([original, by_genretype, by_ratingtype], keys = ['Original', 'By Genre & Type', 'By Rating & Type'])

Unnamed: 0,Unnamed: 1,anime_id,name,genre,type,episodes,rating,members
Original,74,21,One Piece,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",TV,Unknown,8.58,504862
By Genre & Type,86,16498,Shingeki no Kyojin,"Action, Drama, Fantasy, Shounen, Super Power",TV,25,8.54,896229
By Rating & Type,73,5028,Major S5,"Comedy, Drama, Romance, Sports",TV,25,8.58,28653
