***
## CONTENT BASED RECOMMENDER SYSTEM
## HOME WORK 
### by Prayuda Satya Graha
### Shift Academy Data Science Bootcamp Batch 9
***

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Anime Recommendation - Type Based Recommender

In [2]:
# load dataset
anime = pd.read_csv('anime.csv')

# check first 5 rows of data
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [3]:
# dataset information
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [4]:
# for this kind of recommender, we only need 'anime_id', 'name', 'genre', 'type' variables 
anime = anime.iloc[:, :4]
anime.head()

Unnamed: 0,anime_id,name,genre,type
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV


In [5]:
# check missing value
anime.isna().sum()

anime_id     0
name         0
genre       62
type        25
dtype: int64

In [6]:
# drop missing value
anime = anime.dropna()
anime

Unnamed: 0,anime_id,name,genre,type
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV
...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA
12290,5543,Under World,Hentai,OVA
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA


In [7]:
# Feature engineering for content based recommender system
cv = CountVectorizer(tokenizer=lambda x:x.split(', '))
by_type = cv.fit_transform(anime['type'])

In [8]:
# checking unique feature names and length
print(len(cv.get_feature_names()))
print(cv.get_feature_names())

6
['movie', 'music', 'ona', 'ova', 'special', 'tv']


In [9]:
pd.DataFrame(by_type.toarray())

Unnamed: 0,0,1,2,3,4,5
0,1,0,0,0,0,0
1,0,0,0,0,0,1
2,0,0,0,0,0,1
3,0,0,0,0,0,1
4,0,0,0,0,0,1
...,...,...,...,...,...,...
12205,0,0,0,1,0,0
12206,0,0,0,1,0,0
12207,0,0,0,1,0,0
12208,0,0,0,1,0,0


In [10]:
# creating recommender system using cosine similarity 
coScore_anime = cosine_similarity(by_type)

In [11]:
pd.DataFrame(coScore_anime)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12200,12201,12202,12203,12204,12205,12206,12207,12208,12209
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12205,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
12206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
12207,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0
12208,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


In [12]:
anime = anime.reset_index()
anime

Unnamed: 0,index,anime_id,name,genre,type
0,0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie
1,1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV
2,2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV
3,3,9253,Steins;Gate,"Sci-Fi, Thriller",TV
4,4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV
...,...,...,...,...,...
12205,12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA
12206,12290,5543,Under World,Hentai,OVA
12207,12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA
12208,12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA


In [13]:
anime_liked = input('Apa anime yang anda sukai? ')

index_suka = anime[anime['name']==anime_liked].index
index_suka[0]

anime_recom = list(enumerate(coScore_anime[index_suka[0]]))

your_recom = sorted(anime_recom, key = lambda x: x[1], reverse=True)

# Showing 5 recommended music based on the input music
recom = []
for i in your_recom[1:11]:
    recom.append(anime.iloc[i[0],0])

print('Special Pick For You!')
for i in range(len(recom)):
    print('{}. {}, (Genre : {})'.format(1+i, anime.iloc[recom[i], 2],anime.iloc[recom[i], 3] ))

Apa anime yang anda sukai?  Fullmetal Alchemist: Brotherhood


Special Pick For You!
1. Gintama°, (Genre : Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen)
2. Steins;Gate, (Genre : Sci-Fi, Thriller)
3. Gintama&#039;, (Genre : Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen)
4. Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou, (Genre : Comedy, Drama, School, Shounen, Sports)
5. Hunter x Hunter (2011), (Genre : Action, Adventure, Shounen, Super Power)
6. Gintama&#039;: Enchousen, (Genre : Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen)
7. Clannad: After Story, (Genre : Drama, Fantasy, Romance, Slice of Life, Supernatural)
8. Gintama, (Genre : Action, Comedy, Historical, Parody, Samurai, Sci-Fi, Shounen)
9. Code Geass: Hangyaku no Lelouch R2, (Genre : Action, Drama, Mecha, Military, Sci-Fi, Super Power)
10. Haikyuu!! Second Season, (Genre : Comedy, Drama, School, Shounen, Sports)


***
## Anime Recommendation - Genre and Type Mixed Based
***

In [14]:
# Create a new column genre_type
anime['genre_type'] = anime['genre'] + ", " + anime['type']
anime

Unnamed: 0,index,anime_id,name,genre,type,genre_type
0,0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,"Drama, Romance, School, Supernatural, Movie"
1,1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,"Action, Adventure, Drama, Fantasy, Magic, Mili..."
2,2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,"Action, Comedy, Historical, Parody, Samurai, S..."
3,3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,"Sci-Fi, Thriller, TV"
4,4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,"Action, Comedy, Historical, Parody, Samurai, S..."
...,...,...,...,...,...,...
12205,12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,"Hentai, OVA"
12206,12290,5543,Under World,Hentai,OVA,"Hentai, OVA"
12207,12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,"Hentai, OVA"
12208,12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,"Hentai, OVA"


In [15]:
# drop index column
anime = anime.drop(labels='index', axis = 1)

In [16]:
# feature engineering
cv1 = CountVectorizer(tokenizer=lambda x:x.split(', ')) 
by_genre_type = cv.fit_transform(anime['genre_type'])

In [17]:
print(len(cv.get_feature_names()))
print(cv.get_feature_names())

48
['action', 'adventure', 'cars', 'comedy', 'dementia', 'demons', 'drama', 'ecchi', 'fantasy', 'game', 'harem', 'hentai', 'historical', 'horror', 'josei', 'kids', 'magic', 'martial arts', 'mecha', 'military', 'movie', 'music', 'mystery', 'ona', 'ova', 'parody', 'police', 'psychological', 'romance', 'samurai', 'school', 'sci-fi', 'seinen', 'shoujo', 'shoujo ai', 'shounen', 'shounen ai', 'slice of life', 'space', 'special', 'sports', 'super power', 'supernatural', 'thriller', 'tv', 'vampire', 'yaoi', 'yuri']


In [18]:
pd.DataFrame(by_genre_type.toarray())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,38,39,40,41,42,43,44,45,46,47
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,1,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,1,0,0,0
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,1,0,0,0
4,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12205,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12206,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12207,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12208,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
# creating recommender system using cosine similarity 
coScore_anime1 = cosine_similarity(by_genre_type)

In [20]:
pd.DataFrame(coScore_anime1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12200,12201,12202,12203,12204,12205,12206,12207,12208,12209
0,1.000000,0.158114,0.000000,0.000000,0.000000,0.365148,0.000000,0.200000,0.158114,0.000000,...,0.0,0.223607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.316228
1,0.158114,1.000000,0.375000,0.204124,0.375000,0.433013,0.632456,0.316228,0.250000,0.375000,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,0.000000,0.375000,1.000000,0.408248,1.000000,0.433013,0.474342,0.158114,0.875000,1.000000,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
3,0.000000,0.204124,0.408248,1.000000,0.408248,0.235702,0.258199,0.258199,0.204124,0.408248,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,0.000000,0.375000,1.000000,0.408248,1.000000,0.433013,0.474342,0.158114,0.875000,1.000000,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12205,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.316228,0.000000,0.000000,...,1.0,0.707107,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.500000
12206,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.316228,0.000000,0.000000,...,1.0,0.707107,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.500000
12207,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.316228,0.000000,0.000000,...,1.0,0.707107,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.500000
12208,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.316228,0.000000,0.000000,...,1.0,0.707107,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.500000


In [21]:
anime = anime.reset_index()
anime

Unnamed: 0,index,anime_id,name,genre,type,genre_type
0,0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,"Drama, Romance, School, Supernatural, Movie"
1,1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,"Action, Adventure, Drama, Fantasy, Magic, Mili..."
2,2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,"Action, Comedy, Historical, Parody, Samurai, S..."
3,3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,"Sci-Fi, Thriller, TV"
4,4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,"Action, Comedy, Historical, Parody, Samurai, S..."
...,...,...,...,...,...,...
12205,12205,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,"Hentai, OVA"
12206,12206,5543,Under World,Hentai,OVA,"Hentai, OVA"
12207,12207,5621,Violence Gekiga David no Hoshi,Hentai,OVA,"Hentai, OVA"
12208,12208,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,"Hentai, OVA"


In [22]:
anime_liked = input('Apa anime yang anda sukai? ')

index_suka = anime[anime['name']==anime_liked].index
index_suka[0]

anime_recom = list(enumerate(coScore_anime1[index_suka[0]]))

your_recom = sorted(anime_recom, key = lambda x: x[1], reverse=True)

# Showing 5 recommended music based on the input music
recom = []
for i in your_recom[1:11]:
    recom.append(anime.iloc[i[0],0])

print('Special Pick For You!')
for i in range(len(recom)):
    print('{}. {}, (Genre : {})'.format(1+i, anime.iloc[recom[i], 2],anime.iloc[recom[i], 3] ))

Apa anime yang anda sukai?  Fullmetal Alchemist: Brotherhood


Special Pick For You!
1. Fullmetal Alchemist, (Genre : Action, Adventure, Comedy, Drama, Fantasy, Magic, Military, Shounen)
2. Magi: The Kingdom of Magic, (Genre : Action, Adventure, Fantasy, Magic, Shounen)
3. Magi: The Labyrinth of Magic, (Genre : Action, Adventure, Fantasy, Magic, Shounen)
4. Magi: Sinbad no Bouken (TV), (Genre : Action, Adventure, Fantasy, Magic, Shounen)
5. Densetsu no Yuusha no Densetsu, (Genre : Action, Adventure, Fantasy, Magic, Shounen)
6. Tide-Line Blue, (Genre : Action, Adventure, Drama, Military, Shounen)
7. Jikuu Tenshou Nazca, (Genre : Action, Adventure, Drama, Fantasy, Magic)
8. Chain Chronicle: Haecceitas no Hikari, (Genre : Action, Adventure, Fantasy, Magic, Shounen)
9. Fullmetal Alchemist: The Sacred Star of Milos, (Genre : Action, Adventure, Comedy, Drama, Fantasy, Magic, Military, Shounen)
10. Fairy Tail (2014), (Genre : Action, Adventure, Comedy, Fantasy, Magic, Shounen)
