In [1]:
import pandas as pd
import numpy as np

In [3]:
anime = pd.read_csv("app/anime.csv")
ratings = pd.read_csv("app/rating.csv")

In [4]:
anime

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [5]:
ratings

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
7813732,73515,16512,7
7813733,73515,17187,9
7813734,73515,22145,10
7813735,73516,790,9


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
# turning the titles into numbers

vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(anime["name"])

In [7]:
from sklearn.metrics.pairwise import cosine_similarity

def search(title):
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten() # compare the query term to each of the titles in dataset
    indices = np.argpartition(similarity, -5)[-5:]
    result = anime.iloc[indices][::-1] # returns most similar anime
    return result

In [8]:
import ipywidgets as widgets
from IPython.display import display

anime_input = widgets.Text(
    value = "Kimi no Na wa.",
    description = "Anime Title: ",
    disabled=False
)

anime_list = widgets.Output()

# to type onto the data
def on_type(data):
    with anime_list:
        anime_list.clear_output()
        title = data["new"]
        if len(title) > 3:
            display(search(title))

anime_input.observe(on_type, names="value")

display(anime_input, anime_list)

Text(value='Kimi no Na wa.', description='Anime Title: ')

Output()

In [42]:
anime_id = 20

In [43]:
similar_users = ratings[(ratings["anime_id"] == anime_id) & (ratings["rating"] > 4)]["user_id"].unique()

In [44]:
similar_users

array([    3,     5,    21, ..., 73497, 73499, 73500])

In [45]:
similar_users_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["rating"] > 4)]["anime_id"]

In [46]:
similar_users_recs

156           20
157          154
158          170
159          199
160          225
           ...  
7812130    30276
7812131    31173
7812132    31680
7812133    31704
7812134    32648
Name: anime_id, Length: 2979190, dtype: int64

In [47]:
similar_users_recs = similar_users_recs.value_counts() / len(similar_users)

similar_users_recs = similar_users_recs[similar_users_recs > 0.1] 

In [48]:
similar_users_recs

20       1.000000
1535     0.696988
11757    0.509794
1575     0.508223
16498    0.504620
           ...   
21405    0.100665
8086     0.100480
11933    0.100388
8676     0.100388
357      0.100296
Name: anime_id, Length: 334, dtype: float64

In [50]:
all_users = ratings[(ratings["anime_id"].isin(similar_users_recs.index)) & (ratings["rating"] > 4)]

In [51]:
all_users

Unnamed: 0,user_id,anime_id,rating
47,1,8074,10
81,1,11617,10
83,1,11757,10
101,1,15451,10
153,2,11771,10
...,...,...,...
7813727,73515,13161,8
7813730,73515,13659,8
7813731,73515,14345,7
7813735,73516,790,9


In [52]:
all_user_recs = all_users["anime_id"].value_counts() / len(all_users["user_id"].unique())

In [53]:
all_user_recs

1535     0.499647
11757    0.371261
16498    0.368983
1575     0.352571
6547     0.342771
           ...   
834      0.049662
8246     0.048222
13667    0.045695
10589    0.041625
594      0.037673
Name: anime_id, Length: 334, dtype: float64

In [56]:
rec_percentages = pd.concat([similar_users_recs, all_user_recs], axis=1)
rec_percentages.columns = ["Similar Recs", "All Recs"]

In [57]:
rec_percentages

Unnamed: 0,Similar Recs,All Recs
20,1.000000,0.318043
1535,0.696988,0.499647
11757,0.509794,0.371261
1575,0.508223,0.352571
16498,0.504620,0.368983
...,...,...
21405,0.100665,0.067734
8086,0.100480,0.055965
11933,0.100388,0.060359
8676,0.100388,0.066206


In [58]:
rec_percentages["Score"] = rec_percentages["Similar Recs"] / rec_percentages["All Recs"]

In [60]:
rec_percentages = rec_percentages.sort_values("Score", ascending=False)

In [61]:
rec_percentages

Unnamed: 0,Similar Recs,All Recs,Score
20,1.000000,0.318043,3.144230
2144,0.170655,0.059080,2.888532
4437,0.180819,0.063400,2.852031
6325,0.148157,0.051998,2.849264
936,0.183313,0.064473,2.843278
...,...,...,...
572,0.102513,0.086703,1.182350
513,0.106117,0.091287,1.162449
523,0.177539,0.153688,1.155189
512,0.103714,0.094005,1.103282


In [62]:
rec_percentages.head(10).merge(anime, left_index=True, right_on="anime_id")

Unnamed: 0,Similar Recs,All Recs,Score,anime_id,name,genre,type,episodes,rating,members
841,1.0,0.318043,3.14423,20,Naruto,"Action, Comedy, Martial Arts, Shounen, Super P...",TV,220,7.81,683297
3560,0.170655,0.05908,2.888532,2144,Naruto Movie 3: Dai Koufun! Mikazuki Jima no A...,"Action, Adventure",Movie,1,6.95,91526
1828,0.180819,0.0634,2.852031,4437,Naruto: Shippuuden Movie 2 - Kizuna,"Action, Martial Arts, Shounen, Supernatural",Movie,1,7.43,97818
1573,0.148157,0.051998,2.849264,6325,Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...,"Action, Comedy, Martial Arts, Shounen, Super P...",Movie,1,7.5,83515
3449,0.183313,0.064473,2.843278,936,Naruto Movie 2: Dai Gekitotsu! Maboroshi no Ch...,"Adventure, Comedy, Drama, Fantasy, Shounen, Su...",Movie,1,6.99,97308
1237,0.118082,0.041625,2.836799,10589,Naruto: Shippuuden Movie 5 - Blood Prison,"Action, Adventure, Martial Arts, Mystery, Shou...",Movie,1,7.62,75660
1827,0.208122,0.073509,2.831252,2472,Naruto: Shippuuden Movie 1,"Action, Adventure, Comedy, Fantasy, Shounen",Movie,1,7.43,108170
3984,0.106532,0.037673,2.827845,594,Naruto: Takigakure no Shitou - Ore ga Eiyuu Da...,"Action, Adventure, Comedy, Shounen, Super Power",Special,1,6.83,52264
1472,0.135776,0.048222,2.815628,8246,Naruto: Shippuuden Movie 4 - The Lost Tower,"Action, Comedy, Martial Arts, Shounen, Super P...",Movie,1,7.53,84527
784,0.125612,0.045695,2.748926,13667,Naruto: Shippuuden Movie 6 - Road to Ninja,"Action, Adventure, Martial Arts, Shounen, Supe...",Movie,1,7.84,87369


In [15]:
def find_similar_anime(anime_id):
    # Finding recommendations from users similar to anime_id
    similar_users = ratings[(ratings["anime_id"] == anime_id) & (ratings["rating"] > 3)]["user_id"].unique()
    similar_users_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["rating"] > 4)]["anime_id"]
    
    # Adjusting to have only over 10 percent of recommendations from users
    similar_users_recs = similar_users_recs.value_counts() / len(similar_users)
    similar_users_recs = similar_users_recs[similar_users_recs > 0.1] 
    
    # Finding common recommendations among all other users
    all_users = ratings[(ratings["anime_id"].isin(similar_users_recs.index)) & (ratings["rating"] > 4)]
    all_user_recs = all_users["anime_id"].value_counts() / len(all_users["user_id"].unique())
    
    # Concatenating the two
    rec_percentages = pd.concat([similar_users_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["Similar Recs", "All Recs"]
    
    # Calculating and sorting the recommendation score
    rec_percentages["Score"] = rec_percentages["Similar Recs"] / rec_percentages["All Recs"]
    rec_percentages = rec_percentages.sort_values("Score", ascending=False)
    
    #Taking top 10 recommendation and merging to the anime dataset
    return rec_percentages.head(10).merge(anime, left_index=True, right_on="anime_id")[["Score", "name", "genre", "type", "episodes"]]

In [14]:
anime_input_name = widgets.Text(
    value = "Kimi no Na wa.",
    description = "Anime Title: ",
    disabled=False
)

recommendations_list = widgets.Output()

def on_type(data):
    with recommendations_list:
        recommendations_list.clear_output()
        title = data["new"]
        if len(title) > 3:
            result = search(title)
            anime_id = result.iloc[0]["anime_id"]
            display(find_similar_anime(anime_id))
            
anime_input_name.observe(on_type, names="value")

display(anime_input_name, recommendations_list)

Text(value='Kimi no Na wa.', description='Anime Title: ')

Output()