In [69]:
import pandas as pd
import matplotlib.pyplot as plt
import json
import numpy as np
from scipy import sparse
from lightfm import LightFM
from sklearn.metrics.pairwise import cosine_similarity

In [70]:
colnames=['userId', 'animeId', 'rating'] 
df = pd.read_csv("main.txt",delimiter=' ', names=colnames, header=None)

In [71]:
df.head()

Unnamed: 0,userId,animeId,rating
0,6,34572,8
1,6,34566,7
2,6,21,10
3,6,38101,9
4,6,22199,8


In [72]:
def create_interaction_matrix(df,user_col, item_col, rating_col, norm= False, threshold = None):
    interactions = df.groupby([user_col, item_col])[rating_col] \
            .sum().unstack().reset_index(). \
            fillna(0).set_index(user_col)
    if norm:
        interactions = interactions.applymap(lambda x: 1 if x > threshold else 0)
    return interactions

In [73]:
interactions = create_interaction_matrix(df, 'userId', 'animeId', 'rating')
interactions.head()

animeId,1,5,6,7,8,15,16,17,18,19,...,41555,41558,41586,41611,41660,41745,41785,41797,41852,41853
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20,9.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [74]:
def create_user_dict(interactions):
    user_id = list(interactions.index)
    user_dict = {}
    counter = 0 
    for i in user_id:
        user_dict[i] = counter
        counter += 1
    return user_dict

In [75]:
user_dict = create_user_dict(interactions=interactions)
movies_dict = json.load(open('anime_id_to_name.json'))

In [76]:
def runMF(interactions, n_components=30, loss='warp', k=15, epoch=30,n_jobs = 4):
    x = sparse.csr_matrix(interactions.values)
    model = LightFM(no_components= n_components, loss=loss,k=k)
    model.fit(x,epochs=epoch,num_threads = n_jobs)
    return model

In [77]:
def sample_recommendation_user(model, interactions, user_id, user_dict, item_dict,threshold = 0,nrec_items = 10, show = True):
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    scores = pd.Series(model.predict(user_x,np.arange(n_items)))
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index) \
								 .sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: item_dict[str(x)]))
    scores = list(pd.Series(return_score_list).apply(lambda x: item_dict[str(x)]))
    if show == True:
        print("Known Likes:")
        counter = 1
        for i in known_items:
            print(str(counter) + '- ' + i)
            counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            counter+=1
    return return_score_list

In [78]:
rec_list = sample_recommendation_user(model = mf_model, interactions = interactions, user_id = 20, 
                                      user_dict = user_dict,
                                      item_dict = movies_dict, 
                                      threshold = 4,
                                      nrec_items = 10,
                                      show = True)

Known Likes:
1- Seitokai Yakuindomo*
2- Toaru Majutsu no Index-tan Movie: Endymion no Kiseki - Ga Attari Nakattari
3- Sword Art Online: Extra Edition
4- Outbreak Company
5- Kill la Kill
6- Yuusha ni Narenakatta Ore wa Shibushibu Shuushoku wo Ketsui Shimashita.
7- IS: Infinite Stratos 2
8- Golden Time
9- Log Horizon
10- Devil Survivor 2 The Animation
11- Shingeki no Kyojin
12- Toaru Kagaku no Railgun S
13- Tokyo Ravens
14- Hataraku Maou-sama!
15- Date A Live
16- Gintama': Enchousen
17- Kotoura-san
18- Gintama Movie 2: Kanketsu-hen - Yorozuya yo Eien Nare
19- Mondaiji-tachi ga Isekai kara Kuru Sou Desu yo?
20- Senran Kagura
21- Boku wa Tomodachi ga Sukunai Next
22- Maoyuu Maou Yuusha
23- Yahari Ore no Seishun Love Comedy wa Machigatteiru.
24- GJ-bu
25- Ixion Saga DT
26- Ore no Kanojo to Osananajimi ga Shuraba Sugiru
27- Chuunibyou demo Koi ga Shitai!
28- Btooom!
29- Sakura-sou no Pet na Kanojo
30- Ore no Imouto ga Konnani Kawaii Wake ga Nai.
31- Psycho-Pass
32- Kono Naka ni Hitori, Imout

In [79]:
def create_item_emdedding_distance_matrix(model,interactions):
    df_item_norm_sparse = sparse.csr_matrix(model.item_embeddings)
    similarities = cosine_similarity(df_item_norm_sparse)
    item_emdedding_distance_matrix = pd.DataFrame(similarities)
    item_emdedding_distance_matrix.columns = interactions.columns
    item_emdedding_distance_matrix.index = interactions.columns
    return item_emdedding_distance_matrix

def item_item_recommendation(item_emdedding_distance_matrix, item_id, 
                             item_dict, n_items = 10, show = True):
    recommended_items = list(pd.Series(item_emdedding_distance_matrix.loc[item_id,:]. \
                                  sort_values(ascending = False).head(n_items+1). \
                                  index[1:n_items+1]))
    if show == True:
        print("Item of interest :{0}".format(item_dict[str(item_id)]))
        print("Item similar to the above item:")
        counter = 1
        for i in recommended_items:
            print(str(counter) + '- ' +  item_dict[str(i)])
            counter+=1
    return recommended_items

In [80]:
item_item_dist = create_item_emdedding_distance_matrix(model = mf_model,
                                                       interactions = interactions)## Checking item embedding distance matrix
item_item_dist.head()

animeId,1,5,6,7,8,15,16,17,18,19,...,41555,41558,41586,41611,41660,41745,41785,41797,41852,41853
animeId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.881387,0.872817,0.532242,0.113843,0.256764,0.27335,0.239403,0.340641,0.680418,...,-0.312122,-0.525818,-0.433252,-0.057411,-0.368405,-0.364568,-0.335944,-0.318195,-0.388171,-0.366841
5,0.881387,1.0,0.751017,0.553786,0.006889,0.06138,0.24681,0.11171,0.252251,0.496586,...,-0.199823,-0.429569,-0.333117,0.021496,-0.300192,-0.121921,-0.204232,-0.113681,-0.138425,-0.107864
6,0.872817,0.751017,1.0,0.689924,0.380153,0.438912,0.155313,0.33353,0.342783,0.534606,...,-0.260392,-0.477141,-0.434427,-0.152677,-0.428582,-0.337505,-0.346179,-0.315704,-0.376715,-0.348546
7,0.532242,0.553786,0.689924,1.0,0.480872,0.199286,0.247318,0.240968,0.114759,0.281404,...,0.099094,-0.157917,-0.152405,-0.056837,-0.217101,0.006809,-0.096853,0.042968,-0.011301,0.018738
8,0.113843,0.006889,0.380153,0.480872,1.0,0.783932,0.062471,0.757464,0.38103,0.104773,...,0.034172,0.091486,-0.05569,-0.178576,-0.085439,-0.144924,-0.105966,-0.087876,-0.10051,-0.046327


In [81]:
rec_list = item_item_recommendation(item_emdedding_distance_matrix = item_item_dist,
                                    item_id = 1,
                                    item_dict = movies_dict,
                                    n_items = 10)

Item of interest :Cowboy Bebop
Item similar to the above item:
1- Samurai Champloo
2- Cowboy Bebop: Tengoku no Tobira
3- Trigun
4- Akira
5- Koukaku Kidoutai
6- FLCL
7- Kenpuu Denki Berserk
8- Tengen Toppa Gurren Lagann
9- Juubee Ninpuuchou
10- Jin-Rou
