# Finding Similar Anime By Genre

An anime finder by genre using simple feature and jaccard similarity score. 

In [1]:
import numpy as np
import pandas as pd
import itertools
import collections
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_similarity_score

# Preprocessing

We split the genre in preprocessing, so later we can turn them to features.

In [2]:
anime = pd.read_csv("anime.csv") 

anime['genre'] = anime['genre'].fillna('None')
anime['genre'] = anime['genre'].apply(lambda x: x.split(', '))

genre_data = itertools.chain(*animes['genre'].values.tolist())
genre_counter = collections.Counter(genre_data)
genres = pd.DataFrame.from_dict(genre_counter, orient='index').reset_index().rename(columns={'index':'genre', 0:'count'})
genres.sort_values('count', ascending=False, inplace=True)

print(genres)

            genre  count
10         Comedy   4645
4          Action   2845
5       Adventure   2348
6         Fantasy   2309
14         Sci-Fi   2070
0           Drama   2016
9         Shounen   1712
38           Kids   1609
1         Romance   1464
2          School   1220
19  Slice of Life   1220
41         Hentai   1141
3    Supernatural   1037
20          Mecha    944
21          Music    860
11     Historical    806
7           Magic    778
31          Ecchi    637
26         Shoujo    603
23         Seinen    547
16         Sports    543
22        Mystery    495
17    Super Power    465
8        Military    426
12         Parody    408
18          Space    381
27         Horror    369
36          Harem    317
30         Demons    294
24   Martial Arts    265
35       Dementia    240
29  Psychological    229
28         Police    197
34           Game    181
13        Samurai    148
25        Vampire    102
15       Thriller     87
37           Cars     72
33     Shounen Ai     65


# Feature Extraction
The feature extraction is simple, a binary encoded vector of genre.

this shows which feature/genre each show has in binary (by id)

In [3]:
genre_map = {genre: idx for idx, genre in enumerate(genre_counter.keys())}
def extract_feature(genre):
    feature = np.zeros(len(genre_map.keys()), dtype=int)
    feature[[genre_map[idx] for idx in genre]] += 1
    return feature
    
anime_feature = pd.concat([animes['name'], animes['genre']], axis=1)
anime_feature['genre'] = anime_feature['genre'].apply(lambda x: extract_feature(x))
print(anime_feature.head(30))

                                                 name  \
0                                      Kimi no Na wa.   
1                    Fullmetal Alchemist: Brotherhood   
2                                            Gintama°   
3                                         Steins;Gate   
4                                       Gintama&#039;   
5   Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...   
6                              Hunter x Hunter (2011)   
7                                Ginga Eiyuu Densetsu   
8   Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...   
9                            Gintama&#039;: Enchousen   
10                               Clannad: After Story   
11                                     Koe no Katachi   
12                                            Gintama   
13                 Code Geass: Hangyaku no Lelouch R2   
14                            Haikyuu!! Second Season   
15                      Sen to Chihiro no Kamikakushi   
16                            S

# Testing

In [8]:
test_data = anime_feature.take([841, 11, 0, 23])
for row in test_data.iterrows():
    print('Similar anime like {}:'.format(row[1]['name']))
    search = anime_feature.drop([row[0]]) # drop current anime
    search['result'] = search['genre'].apply(lambda x: jaccard_similarity_score(row[1]['genre'], x))
    search_result = search.sort_values('result', ascending=False)['name'].head(10)
    for res in search_result.values:
        print('\t{}'.format(res))
    print()

Similar anime like Naruto:
	Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsugu Mono
	Naruto Soyokazeden Movie: Naruto to Mashin to Mitsu no Onegai Dattebayo!!
	Naruto: Shippuuden
	Naruto Shippuuden: Sunny Side Battle
	Boruto: Naruto the Movie - Naruto ga Hokage ni Natta Hi
	Boruto: Naruto the Movie
	Naruto x UT
	Naruto: Shippuuden Movie 4 - The Lost Tower
	Battle Spirits: Ryuuko no Ken
	Kyutai Panic Adventure!

Similar anime like Koe no Katachi:
	Shinken Seminar Koukou Kouza
	Maji
	Koha Ginjiro
	SKET Dance: Demystifying Special
	Shiranpuri (Movie)
	Sotsugyou M: Ore-tachi no Carnival
	Dodani
	Sotsugyou: Graduation
	Suntory Minami Alps no Tennen Mizu
	Kimi no Iru Machi: Tasogare Kousaten

Similar anime like Kimi no Na wa.:
	Wind: A Breath of Heart (TV)
	Wind: A Breath of Heart OVA
	To Heart 2 Special
	Koi to Senkyo to Chocolate Special
	Koi to Senkyo to Chocolate
	Touka Gettan
	Mizuiro (2003)
	Myself; Yourself
	Air Movie
	Kimikiss Pure Rouge

Similar anime like One Punch Man:
	One Punch Ma

In [5]:
# for row in anime_feature.iterrows():
#     print('Similar anime like {}:'.format(row[1]['name']))
#     search = anime_feature.drop([row[0]])
#     search['result'] = search['genre'].apply(lambda x: jaccard_similarity_score(row[1]['genre'], x))
#     search_result = search.sort_values('result', ascending=False)['name'].head(5)
#     for res in search_result.values:
#         print('\t{}'.format(res))