In [8]:
from service import AnimeInfo, UserInfo
from model import Tag, Review, Character
from config import DBConfig
import pandas as pd
import numpy as np

#print(AnimeInfo.anime_episodes(20))
#print(UserInfo.display_user_activity(6242))

db_conn = DBConfig()

def create_pandas_table(sql_query):
    with db_conn as conn:
        table = pd.read_sql_query(sql_query, conn)
        return table

anime = create_pandas_table("""
select a.id, user_id,
CAST((CASE 
WHEN u.score <= 10 THEN
(CAST(u.score as decimal) / 10) 
ELSE (CAST(u.score as decimal) /100) END) * 100 as INT) as score from anime a 
join user_anime_activity u on a.id=u.anime_id where score > 0;
""")



anime
anime.head()
anime.shape
anime.describe()

Unnamed: 0,id,user_id,score
count,581936.0,581936.0,581936.0
mean,7228.235215,6525.122,67.112901
std,7598.613583,12860.71,21.983033
min,1.0,1.0,10.0
25%,1535.0,2402.0,50.0
50%,6187.0,4616.0,70.0
75%,11241.0,8410.0,80.0
max,125206.0,6200000.0,100.0


In [9]:
anime_pivot_orig = anime.pivot_table(values='score', index='user_id', columns = 'id')
anime_pivot = anime_pivot_orig.fillna(0)
anime_pivot.head()


id,1,5,6,7,8,15,16,17,18,19,...,20792,20793,20794,21131,21202,21390,101922,103572,112641,125206
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,90.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import pairwise_distances
from sklearn.neighbors import NearestNeighbors

n = 20
cosine_knn = NearestNeighbors(n_neighbors=n, algorithm='brute', metric='cosine')
anime_cosine_nn_fit = cosine_knn.fit(anime_pivot.T.values)
anime_distances, anime_indices = anime_cosine_nn_fit.kneighbors(anime_pivot.T.values)


In [14]:
anime_dic = {}
for i in range(len(anime_pivot.T.index)):
    anime_idx = anime_indices[i]
    col_names = anime_pivot.T.index[anime_idx].tolist()
    anime_dic[anime_pivot.T.index[i]] = col_names

In [15]:
anime_dic[20]

[20,
 1735,
 269,
 1535,
 1575,
 8074,
 11757,
 9919,
 6702,
 5114,
 2904,
 813,
 21,
 3588,
 121,
 6547,
 16498,
 10620,
 226,
 223]

In [16]:
from sklearn.metrics.pairwise import cosine_similarity
import operator


def similar_users(user_id, matrix, k=30):
    # create a df of just the current user
    user = matrix[matrix.index == user_id]
    
    # and a df of all other users
    other_users = matrix[matrix.index != user_id]
    
    # calc cosine similarity between user and each other user
    similarities = cosine_similarity(user,other_users)[0].tolist()
    
    # create list of indices of these users
    indices = other_users.index.tolist()
    
    # create key/values pairs of user index and their similarity
    index_similarity = dict(zip(indices, similarities))
    
    # sort by similarity
    index_similarity_sorted = sorted(index_similarity.items(), key=operator.itemgetter(1))
    index_similarity_sorted.reverse()
    
    # grab k users off the top
    top_users_similarities = index_similarity_sorted[:k]
    users = [u[0] for u in top_users_similarities]
    
    return users

similar_user_indices = similar_users(7, anime_pivot)
similar_user_indices


[1410,
 3077,
 2150,
 5143,
 3050,
 7163,
 9130,
 1073,
 4808,
 4062,
 60,
 1601,
 8142,
 6636,
 3200,
 2613,
 2738,
 2926,
 3208,
 76,
 591,
 1274,
 3098,
 1892,
 878,
 6562,
 16428,
 9721,
 1010,
 1938]

In [17]:
def recommend_item(user_index, similar_user_indices, matrix, items=20):
    
    similar_users = matrix[matrix.index.isin(similar_user_indices)]
    similar_users = similar_users.mean(axis=0)
    similar_users_df = pd.DataFrame(similar_users, columns=['average_score'])
    
    
    user_df = matrix[matrix.index == user_index]
    user_df_transposed = user_df.transpose()
    user_df_transposed.columns = ['rating']
    user_df_transposed = user_df_transposed[user_df_transposed['rating']==0]
    animes_unseen = user_df_transposed.index.tolist()
    
    similar_users_df_filtered = similar_users_df[similar_users_df.index.isin(animes_unseen)]
    similar_users_df_ordered = similar_users_df.sort_values(by=['average_score'], ascending=False)
    top_n_anime = similar_users_df_ordered.head(items)
    top_n_anime_indices = top_n_anime.index.tolist()
    anime_information = top_n_anime
    
    return top_n_anime 
recommend_item(1477, similar_user_indices, anime_pivot)

Unnamed: 0_level_0,average_score
id,Unnamed: 1_level_1
7311,80.633333
5081,78.633333
9253,78.366667
4224,75.866667
4181,75.333333
9756,74.133333
12189,73.3
2001,73.1
11597,73.033333
849,73.0
