# Data Preparation

In [None]:
pip install scikit-surprise



In [None]:
from google.colab import drive
from google.colab import files
import pandas as pd
import numpy as np
import surprise as sp
import matplotlib.pyplot as plt
import json
import csv

Since the size of the data file with the scores is over 2.0 GB, I uploaded the file in my google drive and loaded it from there. The other two files were uploaded directly from my computer.

In [None]:
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
uploaded = files.upload()

Saving anime_cleaned.csv to anime_cleaned.csv
Saving users_cleaned.csv to users_cleaned.csv


In [None]:
with open ('/content/anime_cleaned.csv') as anime_cleaned:
  DF_anime = pd.read_csv(anime_cleaned)
with open ('/content/users_cleaned.csv') as users_cleaned:
  DF_users = pd.read_csv(users_cleaned)
with open('/content/gdrive/My Drive/animelists_cleaned.csv') as animelist_cleaned:
  DF_ratings = pd.read_csv(animelist_cleaned)

In [None]:
# I was going to keep movies from the recommendation system because, in the content-based section, 
# the movies of a series would all take top spots when generating a similarity list for a specific series.
# Although this is to be expected, I wanted to get recommendations of different series, and not movies and special of
# the series I just searched. Ultimately I left the movies as a possibility for recommendation so I could see if the 
# system was returning a expected result and working as intended.

#DF_anime = DF_anime[DF_anime.type != 'Movie']
#DF_anime.type.unique()

In [None]:
DF_anime = DF_anime[['anime_id', 'title', 'title_english', 'genre', 'score', 'scored_by']]

In [None]:
DF_users = DF_users[['username', 'user_id', 'user_watching', 'user_completed']]

In [None]:
DF_ratings = DF_ratings[['username', 'anime_id', 'my_score']]

In [None]:
DF_anime.sort_values(by='scored_by', ascending=False).head()

Unnamed: 0,anime_id,title,title_english,genre,score,scored_by
3802,1535,Death Note,Death Note,"Mystery, Police, Psychological, Supernatural, ...",8.67,1009477
3897,16498,Shingeki no Kyojin,Attack on Titan,"Action, Military, Mystery, Super Power, Drama,...",8.49,940211
3009,11757,Sword Art Online,Sword Art Online,"Action, Adventure, Fantasy, Game, Romance",7.64,915986
1261,5114,Fullmetal Alchemist: Brotherhood,Fullmetal Alchemist: Brotherhood,"Action, Military, Adventure, Comedy, Drama, Ma...",9.25,733592
4215,30276,One Punch Man,One Punch Man,"Action, Sci-Fi, Comedy, Parody, Super Power, S...",8.73,691845


In [None]:
DF_anime.describe()

Unnamed: 0,anime_id,score,scored_by
count,6668.0,6668.0,6668.0
mean,15064.977804,6.848998,24035.01
std,13161.364001,0.927448,61121.03
min,1.0,0.0,0.0
25%,2592.0,6.35,681.25
50%,10401.5,6.93,3966.0
75%,30301.25,7.46,19760.75
max,37896.0,9.52,1009477.0


In [None]:
DF_anime.isna().any()

anime_id         False
title            False
title_english     True
genre             True
score            False
scored_by        False
dtype: bool

In [None]:
DF_anime[DF_anime.genre.isna()]

Unnamed: 0,anime_id,title,title_english,genre,score,scored_by
2357,33389,Genbanojou,,,5.58,12
3301,32695,Match Shoujo,The Little Match Girl,,5.65,163
5111,17813,Kyoto Animation: Megane-hen,Glasses,,6.42,3617
6642,37018,Season&#039;s Greetings 2017 from Dwarf,,,4.39,18


In [None]:
DF_anime = DF_anime[DF_anime.genre.notna()]
DF_anime.sort_values(by='scored_by', ascending=False, inplace=True)
DF_anime.reset_index(drop=True, inplace=True)
DF_anime.head()

Unnamed: 0,anime_id,title,title_english,genre,score,scored_by
0,1535,Death Note,Death Note,"Mystery, Police, Psychological, Supernatural, ...",8.67,1009477
1,16498,Shingeki no Kyojin,Attack on Titan,"Action, Military, Mystery, Super Power, Drama,...",8.49,940211
2,11757,Sword Art Online,Sword Art Online,"Action, Adventure, Fantasy, Game, Romance",7.64,915986
3,5114,Fullmetal Alchemist: Brotherhood,Fullmetal Alchemist: Brotherhood,"Action, Military, Adventure, Comedy, Drama, Ma...",9.25,733592
4,30276,One Punch Man,One Punch Man,"Action, Sci-Fi, Comedy, Parody, Super Power, S...",8.73,691845


# MAKING A LIST WITH ALL THE POSSIBLE GENRES FOR THE CONTENT-BASED RECOMMENDER

In [None]:
anime_sample = DF_anime.sample(n=100, random_state=0)

In [None]:
genres = []

for genre in anime_sample['genre']:
 genre = genre.split(', ')
 for string in genre:
   if string in genres:
     pass
   else:
     genres.append(string)
genres.sort()

In [None]:
DF_anime_dict = {}

for i in range(len(DF_anime)):
  list_of_genres = {}
  DF_anime_genre = DF_anime.iloc[i].genre.split(', ')
  
  for genre in genres:
    if genre in DF_anime_genre:
      list_of_genres[genre] = 1
    elif genre not in DF_anime_genre:
      list_of_genres[genre] = 0
    DF_anime_dict[DF_anime.iloc[i].title] = list_of_genres
  
DF_Anime_matrix = pd.DataFrame.from_dict(DF_anime_dict, orient='index')

In [None]:
DF_Anime_matrix.head()

Unnamed: 0,Action,Adventure,Cars,Comedy,Demons,Drama,Ecchi,Fantasy,Game,Harem,Hentai,Historical,Horror,Josei,Kids,Magic,Martial Arts,Mecha,Military,Music,Mystery,Parody,Psychological,Romance,Samurai,School,Sci-Fi,Seinen,Shoujo,Shoujo Ai,Shounen,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Vampire,Yaoi
Death Note,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0
Shingeki no Kyojin,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
Sword Art Online,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Fullmetal Alchemist: Brotherhood,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
One Punch Man,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0


#ADDING ANIMES TO THE USER'S LIST

In [None]:
# Here it is possible to add an anime from the database to the User's Personal List. It saves in the drive, but the path
# can be changed.

checking_anime = False
anime = input('>>> Inform the anime you want to add to your list: ')

if (DF_anime['title'].str.lower() == anime.strip().lower()).any():
  anime_new = DF_anime[DF_anime['title'].str.lower() == anime.strip().lower()]
  checking_anime = True

elif (DF_anime['title_english'].str.lower() == anime.strip().lower()).any():
  anime_new = DF_anime[DF_anime['title_english'].str.lower() == anime.strip().lower()]
  checking_anime = True

if checking_anime is True:
  try:
    User_data = User_data.append(anime_new)
      
    User_data.drop_duplicates(inplace=True)
    User_data.sort_values(by='title', inplace=True)
    User_data.reset_index(drop = True, inplace=True)
    
    User_data.to_csv('/content/gdrive/My Drive/User_data_2.csv', mode='w')
  except:
    User_data = anime_new

    User_data.reset_index(drop= True, inplace=True)
    User_data.insert(4, 'user_rating', np.nan)
    User_data.to_csv('/content/gdrive/My Drive/User_data_2.csv', mode='w')
  finally:
    print('Anime added successfully!')

>>> Inform the anime you want to add to your list: mob psycho 100
Anime added successfully!


In [501]:
# I put this one here just so I don't have to fill the user's list every time

User_data = pd.read_csv('/content/gdrive/My Drive/User_data_2.csv', index_col=0)
User_data

Unnamed: 0,anime_id,title,title_english,genre,user_rating,score,scored_by
0,31646,3-gatsu no Lion,March comes in like a lion,"Game, Slice of Life, Drama, Seinen",9.0,8.43,66567
1,1,Cowboy Bebop,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",8.0,8.81,365465
2,813,Dragon Ball Z,Dragon Ball Z,"Action, Adventure, Comedy, Fantasy, Martial Ar...",9.0,8.31,367982
3,431,Howl no Ugoku Shiro,Howl&#039;s Moving Castle,"Adventure, Drama, Fantasy, Romance",7.0,8.73,334114
4,20,Naruto,Naruto,"Action, Adventure, Comedy, Super Power, Martia...",8.0,7.88,648605
5,30,Neon Genesis Evangelion,Neon Genesis Evangelion,"Action, Sci-Fi, Dementia, Psychological, Drama...",8.0,8.32,364012
6,13601,Psycho-Pass,Psycho-Pass,"Action, Police, Psychological, Sci-Fi",8.0,8.45,377003
7,2236,Toki wo Kakeru Shoujo,The Girl Who Leapt Through Time,"Sci-Fi, Adventure, Drama, Romance",7.0,8.35,249571
8,523,Tonari no Totoro,My Neighbor Totoro,"Adventure, Comedy, Supernatural",7.0,8.45,262204
9,35968,Wotaku ni Koi wa Muzukashii,,"Comedy, Romance",9.0,7.98,15848


# Creating the User_Genre matrix used to calculate the User Preferences

In [503]:
User_genre_dict = {}

for i in range(len(User_data)):
  list_of_genres = {}
  anime_user_genre = User_data.iloc[i].genre.split(', ')
  for genre in genres:
    if genre in anime_user_genre:
      list_of_genres[genre] = 1
    elif genre not in anime_user_genre:
      list_of_genres[genre] = 0
    User_genre_dict[User_data.iloc[i].title] = list_of_genres
    
DF_User_preference = pd.DataFrame.from_dict(User_genre_dict, orient='index')
DF_User_preference

Unnamed: 0,Action,Adventure,Cars,Comedy,Demons,Drama,Ecchi,Fantasy,Game,Harem,Hentai,Historical,Horror,Josei,Kids,Magic,Martial Arts,Mecha,Military,Music,Mystery,Parody,Psychological,Romance,Samurai,School,Sci-Fi,Seinen,Shoujo,Shoujo Ai,Shounen,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Vampire,Yaoi
3-gatsu no Lion,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0
Cowboy Bebop,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0
Dragon Ball Z,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
Howl no Ugoku Shiro,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
Naruto,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0
Neon Genesis Evangelion,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
Psycho-Pass,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
Toki wo Kakeru Shoujo,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
Tonari no Totoro,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
Wotaku ni Koi wa Muzukashii,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Inform the user's rating of an anime

In [None]:
anime = input('>>> Inform the anime you want to update the rating: ')
checking_anime = False

for i in range(len(User_data)):
    if anime.strip().lower() == str(User_data.iloc[i].title).lower():
      checking_anime = True
    elif anime.strip().lower() == str(User_data.iloc[i].title_english).lower():
      checking_anime = True

    if checking_anime is True:
      anime = User_data.iloc[i].title
      try:
        rating = int(input('>>> Inform your rating of the anime from 0-10: '))
      except ValueError:
        print('Invalid rating')
        break
      else:
        if rating < 0:
          rating = 0
        elif rating > 10:
          rating = 10
        
        User_data.at[i, 'user_rating'] = rating

        User_anime_rating = User_data.user_rating.to_numpy()

        User_anime_rating = np.array([User_anime_rating]).transpose()

        DF_User_preference_weight = DF_User_preference*User_anime_rating

        break

User_data.to_csv('/content/gdrive/My Drive/User_data_2.csv', mode='w')
DF_User_preference_weight

In [504]:
# As before, this cell is here so we can have the DF_User_preference_weight without
# having to run the cell above.
 
User_anime_rating = User_data.user_rating.values
User_anime_rating = np.array([User_anime_rating]).transpose()
DF_User_preference_weight = DF_User_preference*User_anime_rating

DF_User_preference_weight

Unnamed: 0,Action,Adventure,Cars,Comedy,Demons,Drama,Ecchi,Fantasy,Game,Harem,Hentai,Historical,Horror,Josei,Kids,Magic,Martial Arts,Mecha,Military,Music,Mystery,Parody,Psychological,Romance,Samurai,School,Sci-Fi,Seinen,Shoujo,Shoujo Ai,Shounen,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Vampire,Yaoi
3-gatsu no Lion,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0
Cowboy Bebop,8.0,8.0,0.0,8.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0
Dragon Ball Z,9.0,9.0,0.0,9.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0
Howl no Ugoku Shiro,0.0,7.0,0.0,0.0,0.0,7.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Naruto,8.0,8.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0
Neon Genesis Evangelion,8.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Psycho-Pass,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Toki wo Kakeru Shoujo,0.0,7.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Tonari no Totoro,0.0,7.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0
Wotaku ni Koi wa Muzukashii,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


The User preference matrix:

In [505]:
DF_User_matrix = (DF_User_preference_weight.sum(axis=0)/(((DF_User_preference_weight.sum())).sum())).to_frame().transpose()
DF_User_matrix.rename({0:'weights'}, axis='index', inplace=True)
DF_User_matrix

Unnamed: 0,Action,Adventure,Cars,Comedy,Demons,Drama,Ecchi,Fantasy,Game,Harem,Hentai,Historical,Horror,Josei,Kids,Magic,Martial Arts,Mecha,Military,Music,Mystery,Parody,Psychological,Romance,Samurai,School,Sci-Fi,Seinen,Shoujo,Shoujo Ai,Shounen,Shounen Ai,Slice of Life,Space,Sports,Super Power,Supernatural,Vampire,Yaoi
weights,0.115819,0.129944,0.0,0.115819,0.0,0.110169,0.0,0.045198,0.025424,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048023,0.022599,0.0,0.0,0.0,0.0,0.045198,0.064972,0.0,0.0,0.087571,0.025424,0.0,0.0,0.048023,0.0,0.025424,0.022599,0.0,0.048023,0.019774,0.0,0.0


In [506]:
Recommend = (DF_Anime_matrix*DF_User_matrix.values).sum(axis=1)/((np.linalg.norm(DF_Anime_matrix, axis=1))*(np.linalg.norm(DF_User_matrix, axis=1)))

for i in range(len(User_data)):
  if User_data.iloc[i].title in Recommend.index.to_list():
    Recommend.drop(index=User_data.iloc[i].title, inplace=True)


Recommend.sort_values(ascending=False).head(n=20)

Urusei Yatsura: Haru da, Tobidase!              0.889511
Urusei Yatsura Movie 1: Only You                0.889511
Urusei Yatsura Movie 4: Lum The Forever         0.889511
Urusei Yatsura Movie 6: Itsudatte My Darling    0.889511
Urusei Yatsura Movie 3: Remember My Love        0.889511
Urusei Yatsura Movie 2: Beautiful Dreamer       0.889511
Urusei Yatsura                                  0.889511
R.O.D the TV                                    0.865361
Trigun: Badlands Rumble                         0.865361
Trigun                                          0.865361
Generator Gawl                                  0.865361
Genma Taisen                                    0.830980
Cowboy Bebop: Yose Atsume Blues                 0.829137
Digimon Adventure tri. 5: Kyousei               0.823230
Digimon Adventure tri. 4: Soushitsu             0.823230
Yoru no Yatterman                               0.823230
Digimon Adventure tri. 2: Ketsui                0.823230
Digimon Adventure tri. 6: Bokur

# COLLABORATIVE FILTERING

This is the section where I start the collaborative filtering part of the recommender. I wrote the whole algorithms instead of using the surprise library just so I could learn more about the subject.
I used the svd method and another one that is basically a svd with the s matrix of 1's, and it runs with gradient descent to find the latent factors of users and animes.

In [None]:
# Was going to left users with less than 100 iterations out of analysis. I do something different, but similar, later on.

#DF_users = DF_users[(DF_users['user_completed'] + DF_users['user_watching']) > 100]
#DF_users.head()

In [None]:
DF_users.shape

(108711, 4)

Taking from the score list the animes removed previously (the ones without genres)

In [None]:
DF_ratings_filtered = DF_ratings[DF_ratings.anime_id.isin(DF_anime.anime_id.to_list())]
DF_ratings_filtered.shape

(31282322, 3)

Number of animes in our data

In [None]:
len(DF_ratings_filtered.anime_id.unique())

6664

Getting how many animes each user have rated

In [None]:
DF_ratings_user = DF_ratings_filtered.groupby(by=['username']).count()
DF_ratings_user.rename(columns={'my_score':'nº of ratings'}, inplace=True)


DF_ratings_user = DF_ratings_user.drop(columns=['anime_id'], axis=1)
DF_ratings_user.reset_index(inplace=True)
DF_ratings_user.sort_values(by=['nº of ratings'], ascending=True).head()

Unnamed: 0,username,nº of ratings
65089,ValenciaCrowe,1
52271,Ritarikukka,1
97568,ragnogirl,1
39503,Matth_Parker,1
17641,Eddie_Caerus,1


In [None]:
DF_ratings_user.shape, DF_users.shape

((108709, 2), (108711, 4))

I'm not going to use users who have rated less than 10 animes for the model.


In [None]:
DF_ratings_user = DF_ratings_user[DF_ratings_user['nº of ratings'] >=10]
DF_ratings_user.shape

(104521, 2)

For performance's sake I'll just create a model using 1500 users chosen randomly

In [None]:
Users_sample = DF_ratings_user.sample(n=1500, random_state=0)

In [None]:
DF_ratings_sample = DF_ratings_filtered[DF_ratings_filtered.username.isin(Users_sample.username.to_list())]

In [None]:
DF_ratings_sample.shape

(460049, 3)

In [None]:
## JUST CHECKING ##


x = DF_ratings_sample.groupby(by=['username']).count()
x.rename(columns={'my_score':'nº of ratings'}, inplace=True)


x = x.drop(columns=['anime_id'], axis=1)
x.reset_index(inplace=True)
x.sort_values(by=['nº of ratings'], ascending=True).head()

Unnamed: 0,username,nº of ratings
619,Nekoshoujo,10
378,InacChan,10
876,TheRussianHare,10
1266,loli_fox,10
848,Suseh,10


Number of ratings each anime have in the sample of 1500 users

In [None]:
DF_ratings_anime = DF_ratings_sample.groupby(by=['anime_id']).count()
DF_ratings_anime.rename(columns={'my_score':'nº of ratings'}, inplace=True)


DF_ratings_anime = DF_ratings_anime.drop(columns=['username'], axis=1)
DF_ratings_anime.reset_index(inplace=True)
DF_ratings_anime.sort_values(by=['nº of ratings'], ascending=True).head()

Unnamed: 0,anime_id,nº of ratings
6077,37860,1
2626,6684,1
5063,31978,1
3745,15389,1
1873,3114,1


In [None]:
DF_ratings_anime.shape

(6078, 2)

Dropping the anime with less than 10 ratings

In [None]:
DF_ratings_anime = DF_ratings_anime[DF_ratings_anime['nº of ratings'] > 10]

In [None]:
## JUST CHECKING IF OUR USERS LIST NOW HAS SOMEONE WITH LESS THAN 10 REVIEWS
DF_ratings_final = DF_ratings_sample[DF_ratings_sample.anime_id.isin(DF_ratings_anime.anime_id.to_list())]


a = DF_ratings_final.groupby(by=['username']).count()
a.rename(columns={'my_score':'nº of ratings'}, inplace=True)


a = a.drop(columns=['anime_id'], axis=1)
a.reset_index(inplace=True)
a.sort_values(by=['nº of ratings'], ascending=True).head()

Unnamed: 0,username,nº of ratings
1266,loli_fox,10
619,Nekoshoujo,10
876,TheRussianHare,10
848,Suseh,10
612,Naoto-Shirogane,10


In [None]:
DF_ratings_anime.shape

(4385, 2)

I'll use DF_ratings_final for the analysis and model building

In [None]:
DF_anime_to_merge = DF_anime[['anime_id', 'title']]

In [None]:
DF_ratings_final = DF_ratings_final.merge(DF_anime_to_merge, on='anime_id')
DF_ratings_final.shape

(452039, 4)

In [None]:
DF_ratings_final.drop(columns=['anime_id'], inplace=True)
Pivot_Data = DF_ratings_final.pivot_table(index='title', columns='username')

Pivot_Data is the the table with the ratings each user gave to each anime

In [None]:
Pivot_Data

Unnamed: 0_level_0,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score,my_score
username,--RIE--,-Anita,-Eva-,-KYUUBl-,-Kuroneko,-Patchouli,-Rena-chan-,-Ryuujin-,-Shockwave-,-SirenOfPeace,-Stark-,-Yuiko-,-lekAA,-nugget-,-thinking-,1und2,4-chansey,4Eyed,5BowlsOfRice,6Sh00t3r,78malex,7hatGuy,92Nemmy92,AO1SOLDIER,ARQandARG,AdriDesuDesu,Aeandir,Aenotsu,Aerow96,Aga_Recitativo,Akane-Hana,Akarui666,Akilucky,Akira_1,AkiyamaHayate,Alabama,Albel-Kun,AleNunesBR,AlexBakaNii,AlexInkheart,...,weeman57,weijie,wendydo980,whatraceami,whitewolf95,xAgrias,xDVxNightfire,xFadmer,xMelonn,xMoMox,xMomoKonekox,xReVaNx,xTan,xXxollinxXx,xYoshi-chan,xrsxj,xxTon3xx,xxavbxx,xxivdk,yalomalsteklo,yesy92,yoshiness,yruahippo,yukio_raiden15,yurififi,zLilith,zani,zaysha,zchs1988,zei_lytpire,zeigtsu500,zeon,zeroconnect,zeus88,zevlovex,ziraki,zoella_izumi,zoque999,zuhri,zurczner
title,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",0.0,,,,,,,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,
"""Bungaku Shoujo"" Memoire",0.0,,,,,,,7.0,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,
"""Bungaku Shoujo"" Movie",6.0,,,0.0,,,,7.0,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,...,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,
.hack//G.U. Returner,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,...,,,,,,,,,,,,,,,,0.0,,,,,,8.0,,,,,,,,,,5.0,,,,,,,,
.hack//Gift,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,4.0,,0.0,,...,,,,,,,,,,,,,,,,0.0,,,,,,9.0,,,,,,,,,,6.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
xxxHOLiC,,,8.0,,,,,6.0,7.0,,,10.0,,7.0,,0.0,,0.0,,,,,9.0,,,,,,,,,,,,,,,,,0.0,...,0.0,,,,,,,,,,,,,,,0.0,,,0.0,,5.0,,,,,,,,,0.0,,,,,0.0,0.0,0.0,,,
xxxHOLiC Kei,,,8.0,,,,,7.0,,,,10.0,,6.0,,0.0,,,,,,,9.0,,,,,,,,,,,,,,,,,0.0,...,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,0.0,,,,,0.0,,,,,
xxxHOLiC Movie: Manatsu no Yoru no Yume,,,,,,,,,,,,10.0,,6.0,,,,,,,,,9.0,,,,,,,,,,,,,,,,,0.0,...,,,,,,,,,,,,,,,,8.0,,,,,,,,,,,,,,,,,,,0.0,,,,,
xxxHOLiC Shunmuki,,,,,,,,7.0,,,,10.0,,,,,,,,,,,9.0,,,,,,,,,,,,,,,,,0.0,...,,,,,,,,,,,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
User_mean = Pivot_Data.mean(axis=0).values
Pivot_Data_Normalized = Pivot_Data - User_mean

Number of valid ratings in the pivot_data table

In [None]:
Pivot_Data_Normalized.count().sum()

452039

# Gradient Descent Approach

This is the step where we train our model. Since it uses Gradient Descent, it converges really slowly. I defined the program to iterate 8000 times or until the Cost Function decreases less than 0.0001, whichever comes first.

The algorithm initializes random small values for the Anime Features and Users Coefficients and update then each step.

This is a model-based collaborative filtering recommender system, it applies the low-rank matrix factorization in the pivot_data matrix and to generate coefficients that describe the users preferences and the animes characteristics. And that's the reason I chose not to use animes with less than 10 ratings and users with less than 10 animes rated, so they would not affect our model with biased samples.

Pivot Table Normalized

In [None]:
J_list = []
lambda_reg = 0.0007
alpha_f = 0.0005
alpha_c = 0.00025

Users_Coefficients = np.random.rand(10,1500)
Anime_Features = np.random.rand(4385,10)
J_Cost_temp = 10**20

for i in range(15000):

  Rating_Predict = Anime_Features.dot(Users_Coefficients)
  

  Reg_Term = (lambda_reg/2)*((((Users_Coefficients**2).sum(axis=1)).sum()) + (((Anime_Features**2).sum(axis=1)).sum()))
  J_Cost = (1/2)*(((Rating_Predict - Pivot_Data_Normalized)**2).sum(skipna=True)).sum(skipna=True) + Reg_Term
  
  if (abs(J_Cost - J_Cost_temp) < 0.8):
    break

  J_Cost_temp = J_Cost
  print(i, J_Cost)
  
  J_list.append(J_Cost)

  Anime_Features_temp = alpha_f*(((Rating_Predict - Pivot_Data_Normalized).fillna(value=0)).dot(Users_Coefficients.transpose()))
  Anime_Features = (1-lambda_reg*alpha_c)*Anime_Features - Anime_Features_temp 
 
  Users_Coefficients_temp = alpha_c*((((Rating_Predict - Pivot_Data_Normalized).fillna(value=0)).transpose()).dot(Anime_Features))
  Users_Coefficients = (1-lambda_reg*alpha_c)*Users_Coefficients - Users_Coefficients_temp.transpose()

Users_Coefficients.to_csv(r'/content/gdrive/My Drive/Users_Coefficients_Ofc.csv')
Anime_Features.to_csv(r'/content/gdrive/My Drive/Anime_Features_Ofc.csv')


# SVD APPROACH

In [None]:
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix

A = csr_matrix(Pivot_Data_Normalized.T.fillna(0))
U,s,V = svds(A, k=10)

In [None]:
s_matrix = np.zeros([10,10])
for i in range(len(s)):
  s_matrix[i][i] = s[i]


In [None]:
user_coefficients_svd = pd.DataFrame(U, index = Pivot_Data.my_score.columns.values.tolist())
user_coefficients_svd.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
--RIE--,-0.02567,-0.001431,0.028145,-0.00029,-0.033628,-0.018217,0.05755,0.056692,0.011182,0.023674
-Anita,0.00459,-0.005692,0.007505,0.005495,-0.01141,-0.013036,0.00465,-0.001321,0.003686,-0.013626
-Eva-,-0.012602,0.030213,0.033625,-0.001738,-0.031665,0.011467,-0.005148,-0.059803,-9.1e-05,0.033293
-KYUUBl-,0.006052,0.057355,-0.00135,0.010183,0.04014,-0.10348,0.02654,0.031799,0.049819,0.075273
-Kuroneko,-0.013606,-0.002999,0.026724,0.010407,0.009442,-0.025673,-0.018236,-0.00839,-0.002645,0.02274


In [None]:
anime_features_svd = pd.DataFrame(V.T, index = Pivot_Data.index.values.tolist())
anime_features_svd.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",0.011608,-0.010055,-0.006359,-0.007988,0.005311,-0.005405,-0.003669,0.001073,-0.001812,-0.001423
"""Bungaku Shoujo"" Memoire",0.0143,-0.007757,0.006287,-0.004732,0.005976,-0.008399,-0.001637,0.005658,-0.004202,-0.003658
"""Bungaku Shoujo"" Movie",0.012838,-0.010244,0.016433,-0.010514,0.002358,-0.005105,-0.000786,0.016618,-0.007482,-0.007197
.hack//G.U. Returner,0.020327,-0.007092,-0.0074,0.002391,0.000827,-0.007112,0.000321,0.001067,-0.002608,0.001351
.hack//Gift,0.015842,0.000576,0.001297,0.003667,0.007427,-0.003796,-0.002771,-0.000587,-0.001289,0.00062


In [None]:
user_coefficients_svd.to_csv(r'/content/gdrive/My Drive/user_coefficients_svd.csv')
anime_features_svd.to_csv(r'/content/gdrive/My Drive/anime_features_svd.csv')

In [None]:
Prediction_Matrix_array = np.matmul(np.matmul(U, s_matrix), V)
Prediction_Matrix = pd.DataFrame(Prediction_Matrix_array, index = Pivot_Data.my_score.columns.values.tolist(), columns = Pivot_Data.index.values.tolist())
Prediction_Matrix.T.head()

Unnamed: 0,--RIE--,-Anita,-Eva-,-KYUUBl-,-Kuroneko,-Patchouli,-Rena-chan-,-Ryuujin-,-Shockwave-,-SirenOfPeace,-Stark-,-Yuiko-,-lekAA,-nugget-,-thinking-,1und2,4-chansey,4Eyed,5BowlsOfRice,6Sh00t3r,78malex,7hatGuy,92Nemmy92,AO1SOLDIER,ARQandARG,AdriDesuDesu,Aeandir,Aenotsu,Aerow96,Aga_Recitativo,Akane-Hana,Akarui666,Akilucky,Akira_1,AkiyamaHayate,Alabama,Albel-Kun,AleNunesBR,AlexBakaNii,AlexInkheart,...,weeman57,weijie,wendydo980,whatraceami,whitewolf95,xAgrias,xDVxNightfire,xFadmer,xMelonn,xMoMox,xMomoKonekox,xReVaNx,xTan,xXxollinxXx,xYoshi-chan,xrsxj,xxTon3xx,xxavbxx,xxivdk,yalomalsteklo,yesy92,yoshiness,yruahippo,yukio_raiden15,yurififi,zLilith,zani,zaysha,zchs1988,zei_lytpire,zeigtsu500,zeon,zeroconnect,zeus88,zevlovex,ziraki,zoella_izumi,zoque999,zuhri,zurczner
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",-0.147298,0.007078,-0.192877,-0.027841,-0.030929,0.044756,-0.025734,0.029538,-0.039515,-0.014006,0.001912,-0.009095,0.041428,-0.088033,-0.029032,0.15493,-0.086858,-0.042033,0.00322,-0.044913,-0.059581,0.083546,-0.05182,0.093355,0.013416,0.029244,0.029588,-0.064988,-0.13144,0.008987,-0.018382,0.019033,-0.001501,-0.032993,-0.014742,-0.074434,-0.000349,0.012335,0.000999,0.010299,...,-0.027801,-0.058028,-0.019643,0.05363,0.011242,0.033147,-0.008202,0.018848,0.02127,-0.100092,0.046981,0.206419,0.009357,-0.015798,-0.033455,0.399912,0.027242,-0.123095,-0.269388,-0.06429,-0.062183,0.024443,-0.002012,0.003707,-0.066682,-0.02615,-0.016154,-0.010703,-0.004848,-0.084862,0.00505,-0.025034,0.007129,-0.009267,-0.187429,0.061762,0.014379,-0.017106,0.014506,-0.008585
"""Bungaku Shoujo"" Memoire",-0.028921,0.045514,-0.217199,0.015213,0.011767,0.023995,0.001932,-0.026081,-0.036863,0.008633,0.012248,0.087488,0.038423,-0.064023,-0.100291,0.229422,-0.057595,-0.083908,-0.01195,-0.071076,-0.040098,0.165826,-0.127408,-0.008639,0.048609,0.082218,0.001185,-0.07131,-0.179737,0.016658,-0.047678,0.009193,0.044192,-0.037713,-0.048511,-0.066387,-0.003868,-0.003617,0.002551,-0.188078,...,-0.04649,-0.105374,-0.03145,0.123484,0.011892,0.007278,0.021838,-0.055249,-0.048477,-0.115771,0.058285,0.024235,-0.021329,0.001009,-0.034747,0.45671,0.023493,-0.084781,-0.454637,-0.039235,-0.273163,0.079427,0.037653,0.013089,-0.067966,-0.018693,-0.00999,-0.01774,0.012346,-0.18661,-0.053507,-0.064289,-0.085059,-0.007741,-0.132745,0.098724,0.031486,-0.006777,-0.005516,-0.027736
"""Bungaku Shoujo"" Movie",0.156371,0.06845,-0.336231,-0.191042,-0.024523,-0.010148,0.111865,0.008281,-0.027141,0.062418,-0.007077,0.087559,-0.013151,-0.060322,-0.24572,0.306403,-0.046045,-0.101118,-0.097786,-0.077736,-0.165356,0.339624,-0.183565,-0.06823,0.100999,0.123885,-0.015206,-0.126755,-0.438704,0.028664,-0.078564,0.093747,0.026629,-0.084416,-0.104506,-0.079055,0.007243,0.01616,-0.009331,-0.703827,...,-0.043672,-0.193848,-0.071271,0.119381,0.023617,0.013977,0.040828,-0.113172,-0.083546,-0.256008,0.03036,-0.284829,-0.024235,0.03893,-0.101006,0.499921,0.027377,-0.114725,-0.636148,0.007495,-0.390881,0.111751,0.026389,0.018627,-0.106859,-0.025592,-0.030991,-0.036674,0.028455,-0.299633,-0.063753,-0.100012,-0.199217,-0.008095,-0.147282,0.121984,0.025889,6.8e-05,-0.01853,-0.058019
.hack//G.U. Returner,-0.082642,0.02254,-0.148371,0.131852,-0.025965,0.043731,-0.123812,-0.01507,0.000394,-0.034445,0.021011,0.113707,0.139009,-0.027426,-0.027339,0.199375,-0.094833,-0.068903,0.029142,-0.053267,-0.046828,0.041906,0.00678,0.044352,0.009828,0.050044,0.048491,-0.013068,0.063506,0.009622,-0.015918,-0.055872,0.030209,-0.004237,0.018551,-0.078065,-0.027619,-0.023901,0.017319,0.471528,...,-0.122598,-0.026665,0.011179,0.18125,0.003778,-0.03124,0.008013,3.1e-05,-0.017175,-0.030601,0.079858,0.252214,-0.016812,-0.04069,0.035745,0.476966,0.025267,-0.08939,-0.208082,-0.088562,-0.136164,0.088811,0.018357,0.008828,-0.031018,-0.018658,0.00382,-0.007389,-0.002654,0.021903,-0.037807,-0.073471,0.008514,-0.015459,-0.1472,0.079733,0.06,-0.019483,-0.020376,-0.030048
.hack//Gift,-0.138644,0.003971,-0.059601,0.15063,0.028734,0.020294,-0.087588,-0.017119,-0.041186,-0.019552,0.031902,0.106763,0.040286,-0.054404,0.030509,0.139213,-0.033334,-0.017392,0.036673,-0.020738,0.036569,-0.003957,-0.061701,0.004012,-0.002849,0.055085,0.027308,-0.015109,0.032681,0.003074,-0.01958,-0.078852,0.06147,-0.001925,-0.021088,-0.015228,-0.007191,-0.019346,0.015372,0.193224,...,-0.040039,-0.01106,0.008303,0.131284,-0.001829,-0.028648,0.007377,-0.009063,-0.018256,0.042909,0.057344,0.125983,-0.017291,-0.013919,0.040014,0.295871,0.016094,-0.014437,-0.306169,-0.041569,-0.185146,0.084443,0.049872,0.008772,-0.014795,-0.016967,0.009836,0.00928,0.005856,-0.079561,-0.045028,-0.018042,-0.006647,-0.00997,0.032208,0.117157,0.025471,-0.012607,-0.008416,-0.002028


In [None]:
col = list(Pivot_Data_Normalized.my_score.columns.values)
ind = list(Pivot_Data_Normalized.index)
data = Pivot_Data_Normalized.values
Rating_Real = pd.DataFrame(data, index=ind, columns=col)
Rating_Real.count().sum()

452039

Scaling to 0~1 for easier analysis

In [None]:
Rating_Real_Scaled = (Rating_Real - Rating_Real.min())/(Rating_Real.max() - Rating_Real.min())

In [None]:
Prediction_Matrix_Scaled = (Prediction_Matrix.T - Prediction_Matrix.min(axis=1))/(Prediction_Matrix.max(axis=1) - Prediction_Matrix.min(axis=1))

In [None]:
Prediction_Matrix_Sparsed = Prediction_Matrix_Scaled + Rating_Real - Rating_Real

Rating_Couple = Rating_Real_Scaled.T.values.copy()
Rating_Real_NP = Rating_Real_Scaled.fillna(value=0).T.values.copy()
Rating_Matrix_NP = Prediction_Matrix_Scaled.T.values.copy()

Rating_Couple = Rating_Couple.tolist()

In [None]:
for i in range(len(Rating_Real_NP)):
  for j in range(len(Rating_Real_NP[i])):
    Rating_Couple[i][j] = (Rating_Real_NP[i][j], Rating_Matrix_NP[i][j])

Verifying the top 10 rated animes of each user and seeing if the system recommended them

In [None]:
recommended = 0
relevant = 0
true_pos = 0

true_pos_list = []
rec_list = []
rel_list = []

for i in range(len(Rating_Couple)):
  Rating_Couple[i].sort(key=lambda x: x[0], reverse=True)
  K_Relevant = Rating_Couple[i][0:10]

  true_pos_i = 0
  rec_i = 0
  rel_i = 0

  for couple in K_Relevant:
    if couple[0] >= 0.5:
      rel_i = rel_i + 1
      relevant = relevant + 1
    if couple[1] >= 0.5:
      rec_i = rec_i + 1
      recommended = recommended + 1
    if couple[0] >= 0.5 and couple[1] >= 0.5:
      true_pos_i = true_pos_i + 1
      true_pos = true_pos + 1 
    
  true_pos_list.append(true_pos_i)
  rec_list.append(rec_i)
  rel_list.append(rel_i)

true_pos, recommended, relevant

(7127, 7255, 14396)

We see that the system got right almost everything that was recommended, but it failed to recommend a lot of animes in the users' top 10. If we lower the threshold we increase the recall value but we lose precision at the same time. Work with the threshold to get a good f1 score is one option to get a better performance, besides trying to optimize the algorithm.

In [None]:
precision_K = true_pos/recommended
recall_K = true_pos/relevant

precision_K, recall_K

(0.9823569951757408, 0.4950680744651292)

For each person

In [None]:
precision_i = np.zeros(1500)
recalls_i = np.zeros(1500)

for i in range(len(true_pos_list)):
  precision_i[i] = true_pos_list[i]/rec_list[i] if rec_list[i] != 0 else 1
  recalls_i[i] = true_pos_list[i]/rel_list[i] if rel_list[i] != 0 else 1

prec_mean = sum(precision_i)/len(precision_i)
rec_mean = sum(recalls_i)/len(recalls_i)

prec_mean, rec_mean

(0.9910960317460319, 0.5112558201058192)

For all Predictions:

In [None]:
true_pos = 0
relevant = 0
recommended = 0

for i in range(len(Prediction_Matrix_Sparsed.values)):
  for j in range(len(Prediction_Matrix_Sparsed.values[i])):
    if Prediction_Matrix_Sparsed.values[i][j] >= 0.5 and Rating_Real_Scaled.values[i][j] >= 0.5:
      true_pos = true_pos + 1
   
    if Rating_Real_Scaled.values[i][j] >= 0.5:
      relevant = relevant+1
    
    if Prediction_Matrix_Sparsed.values[i][j] >= 0.5:
      recommended = recommended + 1

true_pos, relevant, recommended


(58829, 261097, 75012)

In [None]:
true_pos/recommended, true_pos/relevant

(0.7842611848770863, 0.22531472977475803)

# Analyzing the Coefficients from Downloaded File


To avoid having to run the training algorithm of the gradient descent section everytime I opened this file, I uploaded the anime features and users coefficients in my drive. I'll call them here so I can use them.

In [None]:
Anime_Features_Down = pd.read_csv('/content/gdrive/My Drive/Anime_Features_Ofc.csv')
Anime_Features_Down.set_index('title', drop=True, inplace=True)
Anime_Features_Down.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",-0.248208,-0.935479,-1.766164,0.229873,-0.262506,-0.258197,0.747651,0.011273,-0.251339,1.684139
"""Bungaku Shoujo"" Memoire",0.086361,0.253587,0.362902,-0.002156,-2.002999,-1.897555,1.124326,1.054266,-0.125343,0.096866
"""Bungaku Shoujo"" Movie",-0.262888,-1.039591,-0.200068,-0.661391,-0.724966,-1.29691,-0.274135,-0.189604,-0.534663,1.05591
.hack//G.U. Returner,0.070522,-0.090094,-0.352034,0.961471,-0.813988,-0.941846,-0.098607,2.136057,-0.491352,-0.396194
.hack//Gift,0.120338,0.143913,-1.285894,1.020853,0.116558,-0.927395,-0.205443,2.594181,-0.151953,-1.014377


In [None]:
Users_Coeff_Down = pd.read_csv('/content/gdrive/My Drive/Users_Coefficients_Ofc.csv', header=1)
Users_Coeff_Down.rename(columns={'username': '' }, inplace=True)
Users_Coeff_Down.set_index('', drop=True, inplace=True)
Users_Coeff_Down.head()

Unnamed: 0,--RIE--,-Anita,-Eva-,-KYUUBl-,-Kuroneko,-Patchouli,-Rena-chan-,-Ryuujin-,-Shockwave-,-SirenOfPeace,-Stark-,-Yuiko-,-lekAA,-nugget-,-thinking-,1und2,4-chansey,4Eyed,5BowlsOfRice,6Sh00t3r,78malex,7hatGuy,92Nemmy92,AO1SOLDIER,ARQandARG,AdriDesuDesu,Aeandir,Aenotsu,Aerow96,Aga_Recitativo,Akane-Hana,Akarui666,Akilucky,Akira_1,AkiyamaHayate,Alabama,Albel-Kun,AleNunesBR,AlexBakaNii,AlexInkheart,...,weeman57,weijie,wendydo980,whatraceami,whitewolf95,xAgrias,xDVxNightfire,xFadmer,xMelonn,xMoMox,xMomoKonekox,xReVaNx,xTan,xXxollinxXx,xYoshi-chan,xrsxj,xxTon3xx,xxavbxx,xxivdk,yalomalsteklo,yesy92,yoshiness,yruahippo,yukio_raiden15,yurififi,zLilith,zani,zaysha,zchs1988,zei_lytpire,zeigtsu500,zeon,zeroconnect,zeus88,zevlovex,ziraki,zoella_izumi,zoque999,zuhri,zurczner
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0.0,-0.333025,-0.664771,-0.268724,0.322679,1.121673,0.561911,0.197409,-0.119967,-1.702443,1.24152,2.863487,0.023739,-1.866902,0.029076,-0.007667,0.837307,0.958312,2.173571,-0.254824,0.355706,3.770248,0.84346,-0.696908,-0.48223,-1.772037,3.015959,0.284232,-0.468751,-0.937173,-0.406205,-0.90977,-0.255599,1.0521,-1.112884,-0.019863,0.781464,2.943104,-1.202667,0.111139,-1.703497,...,4.016461,-0.088926,-1.266519,0.903092,5.637205,3.534296,-0.525418,0.031611,0.093041,1.353599,1.121944,0.70522,-0.081543,0.145122,-1.378068,-0.002632,-3.481969,1.944949,-1.782705,0.448464,-1.473324,-0.403092,1.796071,2.116712,3.27172,-0.271898,4.020871,1.623149,1.762551,-1.880641,0.391603,0.065706,2.554142,0.980172,0.009581,-0.015609,-0.858174,0.870552,0.404017,0.753051
1.0,-1.085311,1.480909,0.403088,-2.023471,-1.302784,-2.964056,-2.420624,-0.483997,-0.280724,-0.471943,0.726088,-0.864091,-1.352032,-2.415949,-0.521446,-1.683546,-1.989408,0.700974,3.284889,0.002362,-0.786688,-0.454003,0.179621,-1.227138,-1.351029,0.440547,-3.241085,1.240783,0.26183,0.049963,0.972001,0.398698,-1.181488,2.685192,-1.17316,0.686136,-0.176041,-1.121092,0.691001,-2.045726,...,-1.461983,-0.160347,1.157488,-0.759191,2.095653,1.150128,-1.646696,-0.786262,-0.44006,3.275175,-1.982244,-1.395881,-0.552141,-0.021209,1.429861,-0.069215,2.766419,1.458598,1.831942,-0.746323,-0.39119,1.964678,0.390303,-1.650184,1.563717,1.367704,2.249575,0.169659,0.66333,1.37773,-0.950604,-0.388299,2.673987,-1.27622,-0.604904,-0.263968,-1.814337,1.033935,-2.333968,-0.05637
2.0,0.8505,-0.589141,1.437056,0.103792,1.810175,-0.070836,-0.383984,-0.04436,-0.00735,0.820394,0.557474,1.039816,1.881773,3.625643,-1.889902,1.051596,-0.614337,1.127731,2.609353,0.107463,1.457055,0.033525,-0.154204,0.909412,1.039539,-0.567126,0.217201,1.754144,1.840422,-0.244779,-0.152581,-0.178929,0.218656,2.025613,-0.025701,1.075084,-0.485945,0.11922,0.929417,0.392985,...,1.447197,-0.074311,-1.028085,-0.51826,-0.936378,0.0924,3.692852,-0.837021,0.252705,-2.427508,1.033344,-1.034958,-0.302725,-0.122997,0.856233,0.126946,-2.521647,0.283843,0.931586,0.78835,-0.206472,-0.875513,1.607819,2.543769,3.024777,1.20156,2.319016,0.93856,-2.484186,-0.015495,0.114411,-0.0806,7.076157,-2.433339,1.269773,-1.01095,2.933311,3.103863,-0.198374,-3.495518
3.0,-1.507995,-0.360524,0.618335,-0.174171,0.389784,0.660558,0.513094,-0.808408,-0.294651,-3.251373,-1.052376,0.361954,0.606755,1.109019,1.925483,-1.72786,1.176866,-1.249176,-0.113167,-1.523274,-0.984679,-2.042644,-0.762124,-0.452672,0.358401,-0.684415,-2.794083,0.120035,0.571325,-0.386667,-0.474796,-2.37354,0.548709,-0.722136,-0.181681,-2.778128,0.514922,0.056768,-0.985913,2.272933,...,3.397414,-0.297623,-0.760676,-0.848133,-3.924676,-0.460725,2.13638,0.611842,0.181007,1.929264,-2.013554,0.116677,-0.226569,-0.899854,2.715548,-0.17992,-2.484418,-0.748231,-0.172712,-0.737628,0.991419,-1.824624,-1.244497,0.43616,0.766651,-0.113564,4.367718,-0.219778,-2.224214,-0.957352,-0.762053,-0.230277,4.157174,-2.6222,-1.28465,-1.361351,1.360286,-3.128786,0.314776,-0.249259
4.0,0.005333,2.618864,1.229759,-0.91965,0.441676,0.340243,0.139059,0.411154,2.333614,-0.202135,1.201773,2.779526,1.638008,0.628043,0.581891,1.395757,-1.125207,1.229977,-1.975392,0.618213,0.814726,0.046438,0.226147,-0.254991,-0.673652,0.160886,6.511788,-0.096096,-1.064805,0.479029,4.21162,0.017192,0.29341,-2.240515,0.064833,-1.028748,0.601623,-0.559231,0.878123,1.270184,...,-2.511473,-1.909476,2.537309,-0.202858,-1.69517,-3.969143,-0.415798,-1.047588,0.369027,3.847395,1.565021,-0.205912,0.588272,-1.089856,1.222253,-0.118732,2.742029,2.005065,0.186905,-0.445344,-0.002219,-1.359068,1.518612,-0.878273,0.739979,-0.964296,-3.618913,-1.066691,4.202112,-0.899092,-0.443367,0.002429,-0.296255,-2.568174,-0.221063,-2.146414,0.221982,0.84567,0.589188,-1.475744


In [None]:
Rating_Matrix = np.matmul(Anime_Features_Down.values, Users_Coeff_Down.values)
Rating_Matrix = pd.DataFrame(data=Rating_Matrix, index=Anime_Features_Down.index.to_list(), columns = Users_Coeff_Down.columns.to_list())
Rating_Matrix.head()

Unnamed: 0,--RIE--,-Anita,-Eva-,-KYUUBl-,-Kuroneko,-Patchouli,-Rena-chan-,-Ryuujin-,-Shockwave-,-SirenOfPeace,-Stark-,-Yuiko-,-lekAA,-nugget-,-thinking-,1und2,4-chansey,4Eyed,5BowlsOfRice,6Sh00t3r,78malex,7hatGuy,92Nemmy92,AO1SOLDIER,ARQandARG,AdriDesuDesu,Aeandir,Aenotsu,Aerow96,Aga_Recitativo,Akane-Hana,Akarui666,Akilucky,Akira_1,AkiyamaHayate,Alabama,Albel-Kun,AleNunesBR,AlexBakaNii,AlexInkheart,...,weeman57,weijie,wendydo980,whatraceami,whitewolf95,xAgrias,xDVxNightfire,xFadmer,xMelonn,xMoMox,xMomoKonekox,xReVaNx,xTan,xXxollinxXx,xYoshi-chan,xrsxj,xxTon3xx,xxavbxx,xxivdk,yalomalsteklo,yesy92,yoshiness,yruahippo,yukio_raiden15,yurififi,zLilith,zani,zaysha,zchs1988,zei_lytpire,zeigtsu500,zeon,zeroconnect,zeus88,zevlovex,ziraki,zoella_izumi,zoque999,zuhri,zurczner
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",-1.625989,4.555463,-5.517764,1.110959,-3.415123,5.397407,2.263012,-0.388115,-4.011965,0.981314,-8.287828,-1.822238,-0.572761,-4.4678,4.445511,-3.243434,5.414882,-9.949102,-7.612072,-4.844918,-2.79816,2.65263,-0.362829,-3.04583,-0.104391,2.020263,12.006515,2.218531,-5.414394,0.190203,-2.339529,0.194688,-0.500895,-6.918575,0.285173,-2.423341,3.623501,0.937993,-2.886018,0.729821,...,-5.268551,-1.527001,-4.134419,2.307241,4.232037,-4.144161,-5.905142,2.760189,-0.952647,-1.169599,5.543955,5.504541,0.505629,-1.714089,-3.866586,-0.085664,-5.009814,-5.463623,-3.528972,-1.268087,-0.0784,-0.0928,-7.883492,-1.324691,-10.175578,-3.461409,-5.168074,-1.7963,0.661339,0.494831,-0.20141,-0.354904,-28.296804,10.813566,-2.455641,-3.892447,0.088062,-6.706645,3.793927,6.383685
"""Bungaku Shoujo"" Memoire",-1.088253,-2.832685,-7.784119,0.401361,-1.206056,-2.252437,-5.661914,-2.673856,-10.026464,-1.177251,-2.824165,-2.915327,-2.257513,-0.546975,-2.674322,-8.073434,3.859509,-9.645093,2.41838,-7.523713,-1.062141,0.35475,-0.568901,-4.624377,1.83059,5.048689,-27.511885,-6.141125,4.129345,-1.278042,-13.167205,-1.373161,-0.534156,10.362535,-2.19294,2.66078,-3.005009,2.285163,-0.361734,-6.065978,...,-0.964323,0.933755,-8.529701,2.016936,12.175656,8.293402,2.423342,-3.23501,-2.550474,-17.128465,-8.066729,1.097045,-3.272238,1.809737,0.127992,0.289134,-10.853504,-1.825739,-6.198884,-0.157566,-4.992186,-3.746591,-4.436961,3.853818,5.695761,3.070969,17.250188,1.045728,-10.579452,-0.810183,-0.66734,-1.325032,12.30093,2.785065,0.850578,0.641565,-0.695242,-3.040432,-4.632149,0.828147
"""Bungaku Shoujo"" Movie",1.641716,-0.227676,-5.37901,-1.037557,-2.328475,0.118303,0.026019,-0.171436,-5.376172,0.853917,-6.773168,-1.193598,-0.833079,0.478262,-5.981504,-0.690736,-1.251365,-5.335954,-4.282283,-1.759315,-4.657276,2.200674,-1.626008,-1.198164,1.79961,-2.345166,3.492084,-1.356025,-3.983216,-0.553276,-6.011987,-1.476027,-2.591292,-6.279441,-1.97584,-1.939048,-0.257516,1.482365,-1.602147,-3.729933,...,-2.769613,-2.131834,-5.526841,-0.101092,11.063179,-2.242439,0.522262,-2.783976,-2.666809,-12.81169,2.763106,-1.610062,-1.091069,-0.962146,-5.785668,0.392833,-6.419075,-4.278379,-5.679524,1.447749,-2.969174,-2.035553,-2.272145,2.114483,-5.805965,-1.720841,-2.724071,-0.342112,-1.564117,-3.153567,0.799502,-0.469738,-17.153446,2.875274,-0.518584,-2.500386,0.864622,-0.590784,1.600926,2.109706
.hack//G.U. Returner,-3.119154,-7.096476,-4.825155,0.184595,-1.166927,-0.582321,-0.910897,-1.329971,-7.104207,-2.909182,-2.369045,4.212447,0.74278,2.04786,3.833371,-1.894119,3.149382,-7.077051,-3.365932,-6.523527,-3.261695,-1.892524,0.470053,-3.127052,2.118997,-0.558341,-18.853746,-12.493568,3.424709,-0.955113,-6.116478,0.020744,1.238383,10.621848,-0.490011,-0.028631,-1.073022,4.285898,-1.899738,-0.587569,...,4.68398,0.785716,-3.944187,2.07122,10.618332,5.24081,5.546077,-1.232116,-2.263795,-2.187306,-11.012864,0.820781,-2.046945,0.93448,-0.92633,0.278984,-9.591501,4.477735,-8.525161,-1.2872,-1.399755,1.719133,-0.491814,3.516612,6.113248,-0.38963,16.035279,-2.232489,-3.671866,-3.552806,-0.850742,-0.424868,18.042937,-6.626262,-0.284904,3.331832,0.715785,-7.626967,-1.660923,-0.304169
.hack//Gift,-4.359283,-6.208418,-4.378344,-0.469682,-1.829276,-1.308144,-0.755303,-0.825161,-4.429223,-3.9949,-0.302602,6.974891,-0.062245,-0.793856,6.374816,-2.276958,3.314604,-5.860531,-7.928973,-5.75992,-3.428571,-2.713269,1.730794,-4.098481,0.774052,0.722375,-20.63848,-17.660962,2.690266,-0.176269,-1.788313,1.051564,2.294695,10.261992,0.348243,-0.958062,-1.05171,4.367743,-1.642149,0.58487,...,1.922995,-0.127461,0.206974,2.441535,9.911288,4.44713,3.039203,-1.461078,-1.806882,5.714146,-14.889312,1.028571,-1.243696,1.16921,0.740194,0.03148,-2.628843,8.242944,-9.552194,-2.616465,-0.806952,1.953255,0.497081,0.501996,6.962542,-1.819531,14.284702,-4.392706,2.500138,-4.242285,-1.521977,0.08261,20.321573,-8.546427,-0.713244,4.680323,-3.636912,-10.68766,-2.221607,1.305724


In [None]:
col = list(Pivot_Data_Normalized.my_score.columns.values)
ind = list(Pivot_Data_Normalized.index)
data = Pivot_Data_Normalized.values
Rating_Real = pd.DataFrame(data, index=ind, columns=col)
Rating_Real.count().sum()



452039

Scaling to 0~1 for easier analysis

In [None]:
Rating_Real_Scaled = (Rating_Real - Rating_Real.min())/(Rating_Real.max() - Rating_Real.min())

In [None]:
Rating_Matrix_Scaled = (Rating_Matrix - Rating_Matrix.min())/(Rating_Matrix.max() - Rating_Matrix.min())

In [None]:
Rating_Matrix_Sparsed = Rating_Matrix_Scaled + Rating_Real - Rating_Real

Rating_Couple = Rating_Real_Scaled.T.values.copy()
Rating_Real_NP = Rating_Real_Scaled.fillna(value=0).T.values.copy()
Rating_Matrix_NP = Rating_Matrix_Sparsed.fillna(value=0).T.values.copy()

Rating_Couple = Rating_Couple.tolist()


In [None]:
for i in range(len(Rating_Real_NP)):
  for j in range(len(Rating_Real_NP[i])):
    Rating_Couple[i][j] = (Rating_Real_NP[i][j], Rating_Matrix_NP[i][j])

In [None]:
recommended = 0
relevant = 0
true_pos = 0

true_pos_list = []
rec_list = []
rel_list = []

for i in range(len(Rating_Couple)):
  Rating_Couple[i].sort(key=lambda x: x[0], reverse=True)
  K_Relevant = Rating_Couple[i][0:10]

  true_pos_i = 0
  rec_i = 0
  rel_i = 0

  for couple in K_Relevant:
    if couple[0] >= 0.5:
      rel_i = rel_i + 1
      relevant = relevant + 1
    if couple[1] >= 0.5:
      rec_i = rec_i + 1
      recommended = recommended + 1
    if couple[0] >= 0.5 and couple[1] >= 0.5:
      true_pos_i = true_pos_i + 1
      true_pos = true_pos + 1 
    
  true_pos_list.append(true_pos_i)
  rec_list.append(rec_i)
  rel_list.append(rel_i)

true_pos, recommended, relevant

(11164, 11184, 14396)

In [None]:
precision_K = true_pos/recommended
recall_K = true_pos/relevant

precision_K, recall_K

(0.998211731044349, 0.775493192553487)

For each person:

In [None]:
precision_i = np.zeros(1500)
recalls_i = np.zeros(1500)
for i in range(len(true_pos_list)):
  precision_i[i] = true_pos_list[i]/rec_list[i] if rec_list[i] != 0 else 1
  recalls_i[i] = true_pos_list[i]/rel_list[i] if rel_list[i] != 0 else 1

prec_mean = sum(precision_i)/len(precision_i)
rec_mean = sum(recalls_i)/len(recalls_i)

prec_mean, rec_mean

(0.9954, 0.7815870370370364)

Precision and Recall for all recommendations

In [None]:
true_pos = 0
relevant = 0
recommended = 0

for i in range(len(Rating_Matrix_Sparsed.values)):
  for j in range(len(Rating_Matrix_Sparsed.values[i])):
    if Rating_Matrix_Sparsed.values[i][j] >= 0.6 and Rating_Real_Scaled.values[i][j] >= 0.6:
      true_pos = true_pos + 1
   
    if Rating_Real_Scaled.values[i][j] >= 0.6:
      relevant = relevant+1
    
    if Rating_Matrix_Sparsed.values[i][j] >= 0.6 :
      recommended = recommended + 1

true_pos, relevant, recommended


(76105, 244426, 103389)

In [None]:
true_pos/recommended, true_pos/relevant

(0.7361034539457776, 0.31136213005163116)

# Getting the recommendation for a specific anime

In [None]:
DF_anime[DF_anime.title == 'Chobits']

Unnamed: 0,anime_id,title,title_english,genre,score,scored_by
176,59,Chobits,Chobits,"Sci-Fi, Comedy, Drama, Romance, Ecchi, Seinen",7.53,175388


From the Gradient Descent Algorithm

In [None]:
anime_val =  'Cowboy Bebop'
((Anime_Features_Down.loc[anime_val] * Anime_Features_Down).sum(axis=1)/(np.linalg.norm(Anime_Features_Down.loc[anime_val])* np.linalg.norm(Anime_Features_Down, axis=1))).sort_values(ascending=False).head(n=20)

title
Cowboy Bebop                                                            1.000000
Samurai Champloo                                                        0.937515
Rurouni Kenshin: Meiji Kenkaku Romantan - Tsuioku-hen                   0.934229
Golden Boy                                                              0.922818
FLCL                                                                    0.916816
Black Lagoon                                                            0.899420
Trigun                                                                  0.888203
Cowboy Bebop: Tengoku no Tobira                                         0.887183
Neon Genesis Evangelion                                                 0.876350
Ghost in the Shell                                                      0.869400
NHK ni Youkoso!                                                         0.854162
Black Lagoon: The Second Barrage                                        0.852615
Hellsing              

From the svd algorithm

In [None]:
anime_val =  'Cowboy Bebop'
((anime_features_svd.loc[anime_val] * anime_features_svd).sum(axis=1)/(np.linalg.norm(anime_features_svd.loc[anime_val])* np.linalg.norm(anime_features_svd, axis=1))).sort_values(ascending=False).head(n=20)

Cowboy Bebop                                             1.000000
Cowboy Bebop: Tengoku no Tobira                          0.873744
Trigun                                                   0.863188
Rurouni Kenshin: Meiji Kenkaku Romantan - Tsuioku-hen    0.858220
Black Lagoon                                             0.854035
FLCL                                                     0.851437
Neon Genesis Evangelion                                  0.846081
Riding Bean                                              0.833312
Akira                                                    0.831429
Samurai Champloo                                         0.824967
Baccano!                                                 0.819583
Basilisk: Kouga Ninpou Chou                              0.819085
Hellsing                                                 0.818905
Ghost in the Shell                                       0.811474
Detroit Metal City                                       0.810471
Kenpuu Den

# TEST SET FOR THE MODEL

I'll use the anime features and s_matrix I obtained from the svd section from now on, With them I'll calculate the coefficients of the users in the test set and then analyze how well the model performed.

Getting only users who are not in the training set

In [442]:
DF_ratings_user_test = DF_ratings_user[~DF_ratings_user.username.isin(Users_sample.username.to_list())]

In [443]:
Users_sample_test = DF_ratings_user_test.sample(n=300, random_state=2)

In [444]:
DF_ratings_test = DF_ratings_filtered[DF_ratings_filtered.username.isin(Users_sample_test.username.to_list())]

In [445]:
DF_ratings_test.shape

(95142, 3)

In [446]:
## JUST CHECKING ##
y = DF_ratings_test.groupby(by=['username']).count()
y.rename(columns={'my_score':'nº of ratings'}, inplace=True)


y = y.drop(columns=['anime_id'], axis=1)
y.reset_index(inplace=True)
y.sort_values(by=['nº of ratings'], ascending=True).head()

Unnamed: 0,username,nº of ratings
42,Criagaknight,10
287,susamajii,10
114,Meer_campbell,11
9,Akira-Toudou,11
282,sarah-jahan,12


Number of ratings per anime in the sample of 300 users

In [447]:
DF_ratings_test_anime = DF_ratings_test.groupby(by=['anime_id']).count()
DF_ratings_test_anime.rename(columns={'my_score':'nº of ratings'}, inplace=True)


DF_ratings_test_anime = DF_ratings_test_anime.drop(columns=['username'], axis=1)
DF_ratings_test_anime.reset_index(inplace=True)
DF_ratings_test_anime.sort_values(by=['nº of ratings'], ascending=True).head()

Unnamed: 0,anime_id,nº of ratings
4699,32898,1
2335,6422,1
4875,33723,1
4874,33720,1
4873,33717,1


In [448]:
DF_ratings_test_anime.shape

(5403, 2)

Selecting only the anime that are in the training set

In [449]:
DF_Scores_ID = DF_ratings_final.merge(DF_anime_to_merge, on='title')
DF_Scores_ID.drop(columns=['title'], inplace=True)
DF_Scores_ID.shape

DF_Scores_ID.head()

Unnamed: 0,username,my_score,anime_id
0,The_Lordian,8,21
1,Itami,0,21
2,Flerkisa,0,21
3,Moneeq,0,21
4,SajayenM,9,21


In [450]:
DF_ratings_test_anime = DF_ratings_test_anime[DF_ratings_test_anime.anime_id.isin(DF_Scores_ID.anime_id.to_list())]
DF_ratings_test_anime.shape

(4359, 2)

In [451]:
## Removing the ratings of the removed animes from the ratings list
DF_ratings_test = DF_ratings_test[
DF_ratings_test.anime_id.isin(DF_ratings_test_anime.anime_id.to_list())]


a = DF_ratings_test.groupby(by=['username']).count()
a.rename(columns={'my_score':'nº of ratings'}, inplace=True)

a = a.drop(columns=['anime_id'], axis=1)

a.reset_index(inplace=True)
a.sort_values(by=['nº of ratings'], ascending=True).head()

Unnamed: 0,username,nº of ratings
42,Criagaknight,10
287,susamajii,10
114,Meer_campbell,11
9,Akira-Toudou,11
282,sarah-jahan,12


In [452]:
DF_ratings_test_anime.shape

(4359, 2)

In [453]:
DF_ratings_test = DF_ratings_test.merge(DF_anime_to_merge, on='anime_id')
DF_ratings_test.shape

(93184, 4)

In [454]:
DF_ratings_test.drop(columns=['anime_id'], inplace=True)
Pivot_Data_test = DF_ratings_test.pivot_table(index='title', columns='username')

In [455]:
User_mean_test = Pivot_Data_test.mean(axis=0).values
Pivot_Data_Normalized_test = Pivot_Data_test - User_mean_test

Getting the features from the animes that were rated by our users in the test set (I'll use the features obtained from the svd approach)

In [457]:
Anime_Features_svd_test = anime_features_svd[anime_features_svd.index.isin(Pivot_Data_test.index)]

Using the anime features obtained previously, I run a gradient descent algorithm to obtain the coefficients of our users in the test set.

In [463]:
J_list = []
J_Cost_temp = 10**20
Users_Coeff_test = np.random.rand(10,300)
m = len(Anime_Features_svd_test)
i = 0

# Already taking into account the s_matrix
Anime_Features_Test = Anime_Features_svd_test.values @ s_matrix

lambda_reg = 0.01
alpha_c = 0.05

while True:

  Predict_test = Anime_Features_Test @ Users_Coeff_test

  Reg_Term = (lambda_reg/(2*m))*(((Users_Coeff_test**2).sum(axis=1)).sum())
 
  J_Cost = (1/(2*m))*np.nansum((Predict_test - Pivot_Data_Normalized_test.values)**2)+ Reg_Term
  
  if abs(J_Cost - J_Cost_temp) < 0.0005:
    break

  J_Cost_temp = J_Cost
  
  #print(i, J_Cost)
  
  i = i+1
  J_list.append(J_Cost)
 
  Users_Coeff_test_temp = (alpha_c/m)*(((Predict_test - Pivot_Data_Normalized_test).fillna(value=0).T.values) @ Anime_Features_Test)

  Users_Coeff_test = (1-lambda_reg*alpha_c/m)*Users_Coeff_test - Users_Coeff_test_temp.T


In [464]:
Users_Coeff_Test_DF = pd.DataFrame(Users_Coeff_test.T, index = Pivot_Data_test.my_score.columns.values.tolist())
Users_Coeff_Test_DF.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
-Azumi-,-0.138702,-0.028444,-0.045563,0.23399,0.044624,0.587256,0.141244,0.222076,0.426258,0.125106
-Hiryuu-,-0.020223,-0.024475,-0.012538,0.007182,0.009366,0.053896,0.005366,-0.010214,-0.043821,0.013552
-Hyper-,0.049301,-0.019494,0.01054,0.032345,-0.022509,0.029687,-0.0968,0.008986,0.014638,0.082646
90sanimebabe,0.082475,0.011499,0.016322,0.060167,0.055776,-0.004526,0.08033,0.029096,0.004094,0.015985
Abel_Sins,0.047791,0.110001,0.122791,-0.007034,0.026026,-0.09997,0.055754,-0.06618,-0.149893,-0.065296


In [465]:
Anime_Features_Test_DF = pd.DataFrame(Anime_Features_Test, index = Pivot_Data_test.index.values.tolist())
Anime_Features_Test_DF.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",2.013025,-1.778773,-1.189023,-1.515571,1.038595,-1.117914,-0.791835,0.253805,-0.48491,-0.590422
"""Bungaku Shoujo"" Memoire",2.479863,-1.372322,1.175707,-0.897822,1.168563,-1.737115,-0.353409,1.338857,-1.124248,-1.517183
"""Bungaku Shoujo"" Movie",2.226219,-1.812268,3.072877,-1.994933,0.461032,-1.055839,-0.169662,3.932588,-2.001687,-2.985111
.hack//G.U. Returner,3.524981,-1.254557,-1.383865,0.453719,0.161668,-1.470848,0.069275,0.252574,-0.697696,0.560382
.hack//Gift,2.747244,0.101832,0.242597,0.695666,1.452201,-0.785083,-0.598055,-0.139023,-0.344752,0.256968


In [466]:
Prediction_Matrix_Test = pd.DataFrame(Predict_test, index = Pivot_Data_test.index.values.tolist(), columns = Pivot_Data_test.my_score.columns.values.tolist())
Prediction_Matrix_Test.head()

Unnamed: 0,-Azumi-,-Hiryuu-,-Hyper-,90sanimebabe,Abel_Sins,Abernite,Adri_VolKatina,AdventAntigone,Akii-kun,Akira-Toudou,Akumayuki,Alcor404,Amerowolf,Anbu-elite,Andoux3,AnimeFanatic007,Animehunter,Animeoh,Antekk,Antione,ArcziZ,Arkan4ik,AsoeAce,Attanus,AyumiSasaki,Ayumu-H,Azuki-zenzai,Banpi,Bergioyn,Biribiri32,BleachIchimaru04,BoundlessSpirit,BrutallPrincess,CStephen,Chbangus9000,ChibiMadness,ChibiMoyashi,Chocolatix,ChronusZ,Chubbsy,...,notreallygood,nyako-sensei,oniikyun,ph15h,phlzkeson,popske,pozdrawiamtatexd,princessofheart,psychoplast,purevil89,raichusuperstar,ramenstopen,razaru,recuerdo,redjzSuzuki,renruf,revolvlover,roxey-chan,ryoangus,ryusaki97,sailorgoon,sanzx,sarah-jahan,saylynd,sensei69,shatterdoll,skyestrife15,susamajii,takuyafire,vampireas,wawero,whooopsann,wumbmasta,xBlacklegluffyx,xKawaiiTenshi-,xXhanatchiXx,yaanns,yunoxyuki12,zel2zel,zlader
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",-1.475263,-0.037267,0.038837,0.030318,-0.045722,0.159794,-0.123089,-0.313381,-0.783833,-0.921575,-0.558096,0.136609,-0.216324,0.066211,0.200286,-0.240996,0.308378,-0.386602,0.28372,0.018318,0.509145,0.716936,0.200828,0.188034,-2.048406,0.141175,-0.15712,0.180806,0.097901,0.220685,0.187173,-0.392831,-2.239006,-0.10342,0.536765,-0.072597,-0.956085,0.285213,-0.014492,0.06165,...,-0.207524,-0.115751,-0.295208,-0.201777,0.013765,0.128364,-0.101053,0.052214,0.019674,-0.209681,-0.003327,0.490657,0.167503,0.085655,-1.10806,0.4486,0.287462,0.22702,0.092522,-0.349159,-0.27226,0.172016,-1.58031,0.059113,0.065077,-0.584174,-0.103913,0.38612,-0.047239,0.025456,0.200053,0.185857,-0.029158,0.643013,0.156531,-0.283298,-0.18635,-0.21438,-0.210031,-0.095582
"""Bungaku Shoujo"" Memoire",-1.95818,-0.107296,-0.041114,0.208668,0.481587,0.609703,-0.079483,-0.393483,-1.18124,0.848166,-0.399685,0.064919,-0.175793,0.184237,0.240321,-0.182287,0.152665,-0.638419,0.344467,0.038192,0.328266,0.407112,0.140894,-0.024681,-0.049877,-0.050735,-0.033177,0.240952,0.005951,0.136458,0.168205,-0.043644,-2.408863,-0.192201,0.638507,0.317225,-0.297213,0.148209,-0.211246,-0.199273,...,-0.465493,-0.231876,-0.035171,0.17839,-0.130468,0.022037,-0.029966,-0.279596,0.326312,-0.147002,-0.006841,0.536073,0.314069,0.239024,-0.872575,0.138762,0.190801,0.215131,-0.162038,-0.136085,-0.117539,-0.014644,-0.311012,0.010031,-0.045056,-0.581106,0.175801,1.79603,-0.015001,-0.166954,0.428961,0.023387,-0.116947,0.262791,0.302006,-0.298779,-0.294087,-0.339746,-0.343438,0.116416
"""Bungaku Shoujo"" Movie",-1.84083,-0.099923,-0.153023,0.168268,0.641187,0.978339,-0.058323,-0.328596,-2.002058,0.521653,-0.357353,0.059583,-0.184054,0.267616,0.203594,-0.210504,-0.027402,-0.664272,0.147331,-0.112701,-0.145065,0.059814,-0.317396,-0.273372,1.733822,-0.149061,-0.013012,0.26815,-0.019457,0.001496,0.060169,-0.308191,-2.516372,-0.497396,0.524518,0.510175,0.527759,-0.076971,-0.337244,-0.495121,...,-0.952272,-0.385437,-0.009244,0.815378,-0.646889,-0.03293,-0.304724,-0.505264,0.49281,-0.104377,-0.016562,0.469694,0.401672,0.522229,-0.958338,-0.374761,-0.149672,-0.004763,-0.715311,-0.122403,-0.607108,-0.229647,0.404107,0.036119,-0.228176,-0.575347,0.452265,3.272345,0.062208,-0.241718,0.544905,-0.28747,-0.143004,-0.370827,0.107929,-0.064329,-0.487021,-0.748162,-0.393748,0.360419
.hack//G.U. Returner,-1.301985,-0.061769,0.182692,0.315699,0.063726,0.109214,-0.025709,-0.329804,-0.335612,1.688655,-0.536316,0.162589,-0.098749,0.068148,0.317589,-0.16172,0.531535,-0.12534,0.29846,0.35846,0.462633,0.789729,0.260608,0.369527,-1.647893,0.295997,0.007663,0.600221,-0.039142,0.268415,0.080407,-0.211104,-1.738285,0.270862,0.462019,-0.255361,-0.691717,0.435966,-0.019426,0.305747,...,-0.34991,-0.043033,-0.162326,-0.455871,0.284636,0.082503,-0.033475,0.080628,0.164023,-0.328704,0.000192,0.791833,-0.103896,0.025047,-0.81138,0.599768,0.466151,0.452567,0.413835,-0.304122,-0.056042,0.157736,-0.092157,0.136977,0.029578,-0.382957,-0.214761,0.899966,-0.101814,-0.012583,0.249995,0.223089,0.069765,0.826408,0.414061,-0.089179,-0.24106,-0.172961,-0.021339,0.019914
.hack//Gift,-0.858611,-0.068006,0.175353,0.308727,0.294424,0.350867,-0.008785,-0.310272,-0.13006,2.594838,-0.218519,0.062893,0.023461,0.092771,0.229084,-0.072873,0.166531,0.076268,0.388715,0.179315,0.303927,0.252644,0.293494,0.138852,0.1749,-0.016983,-0.030831,0.322513,0.000767,0.170781,0.203829,0.44828,-1.037784,0.00873,0.334707,-0.047124,-0.123414,0.135153,0.011453,-0.112826,...,-0.131697,0.051398,0.098414,0.022415,0.199399,0.069777,-0.169277,-0.1622,0.282868,-0.066726,-0.002623,0.46442,-0.107396,0.023944,0.086111,0.18087,0.319807,0.13168,0.302324,0.029539,0.37032,-0.050422,1.195374,0.005711,0.170963,-0.212922,0.025864,1.547217,-0.003068,-0.142447,0.255256,0.02219,-0.07176,0.346244,0.266438,-0.048747,-0.12801,-0.060221,-0.11338,0.022851


In [467]:
col_test = list(Pivot_Data_Normalized_test.my_score.columns.values)
ind_test = list(Pivot_Data_Normalized_test.index)
data_test = Pivot_Data_Normalized_test.values
Rating_Real_test = pd.DataFrame(data_test, index=ind_test, columns=col_test)
Rating_Real_test.head()

Unnamed: 0,-Azumi-,-Hiryuu-,-Hyper-,90sanimebabe,Abel_Sins,Abernite,Adri_VolKatina,AdventAntigone,Akii-kun,Akira-Toudou,Akumayuki,Alcor404,Amerowolf,Anbu-elite,Andoux3,AnimeFanatic007,Animehunter,Animeoh,Antekk,Antione,ArcziZ,Arkan4ik,AsoeAce,Attanus,AyumiSasaki,Ayumu-H,Azuki-zenzai,Banpi,Bergioyn,Biribiri32,BleachIchimaru04,BoundlessSpirit,BrutallPrincess,CStephen,Chbangus9000,ChibiMadness,ChibiMoyashi,Chocolatix,ChronusZ,Chubbsy,...,notreallygood,nyako-sensei,oniikyun,ph15h,phlzkeson,popske,pozdrawiamtatexd,princessofheart,psychoplast,purevil89,raichusuperstar,ramenstopen,razaru,recuerdo,redjzSuzuki,renruf,revolvlover,roxey-chan,ryoangus,ryusaki97,sailorgoon,sanzx,sarah-jahan,saylynd,sensei69,shatterdoll,skyestrife15,susamajii,takuyafire,vampireas,wawero,whooopsann,wumbmasta,xBlacklegluffyx,xKawaiiTenshi-,xXhanatchiXx,yaanns,yunoxyuki12,zel2zel,zlader
"""Bungaku Shoujo"" Kyou no Oyatsu: Hatsukoi",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-7.019608
"""Bungaku Shoujo"" Memoire",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-1.034884,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.980392
"""Bungaku Shoujo"" Movie",,,,,,,,,,,,,,,,,,,,,,,1.600707,,,,,,,,,,,-1.034884,,,,,-5.024793,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-4.535565,,,,,,,,,,0.980392
.hack//G.U. Returner,,,,,,,,,-8.198198,,,,,,,,,,-5.69146,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.980392
.hack//Gift,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.980392


Scaling to 0~1 for easier analysis

In [468]:
Rating_Real_test_Scaled = (Rating_Real_test - Rating_Real_test.min())/(Rating_Real_test.max() - Rating_Real_test.min())

In [471]:
Prediction_Matrix_Test_Scaled = (Prediction_Matrix_Test - Prediction_Matrix_Test.min())/(Prediction_Matrix_Test.max() - Prediction_Matrix_Test.min())

In [472]:
Prediction_Matrix_Sparsed_Test = Prediction_Matrix_Test_Scaled + Rating_Real_test - Rating_Real_test

Rating_Couple_Test = Rating_Real_test_Scaled.T.values.copy()
Rating_Real_NP_Test = Rating_Real_test_Scaled.fillna(value=0).T.values.copy()
Rating_Matrix_NP_Test = Prediction_Matrix_Test_Scaled.T.values.copy()

Rating_Couple_Test = Rating_Couple_Test.tolist()

In [473]:
for i in range(len(Rating_Real_NP_Test)):
  for j in range(len(Rating_Real_NP_Test[i])):
    Rating_Couple_Test[i][j] = (Rating_Real_NP_Test[i][j], Rating_Matrix_NP_Test[i][j])

Top K Recommendations

In [475]:
recommended = 0
relevant = 0
true_pos = 0

true_pos_list = []
rec_list = []
rel_list = []

for i in range(len(Rating_Couple_Test)):
  Rating_Couple_Test[i].sort(key=lambda x: x[0], reverse=True)
  K_Relevant = Rating_Couple_Test[i][0:10]

  true_pos_i = 0
  rec_i = 0
  rel_i = 0

  for couple in K_Relevant:
    if couple[0] >= 0.5:
      rel_i = rel_i + 1
      relevant = relevant + 1
    if couple[1] >= 0.5:
      rec_i = rec_i + 1
      recommended = recommended + 1
    if couple[0] >= 0.5 and couple[1] >= 0.5:
      true_pos_i = true_pos_i + 1
      true_pos = true_pos + 1 
    
  true_pos_list.append(true_pos_i)
  rec_list.append(rec_i)
  rel_list.append(rel_i)

true_pos, recommended, relevant

(1755, 1776, 2881)

We got almost 99% precision for the users' top 10 animes, which is pretty good, even though we used only 1500 users to estimate our anime features. In a real situation we would have used a bigger sample if not all dataset, but for this example we got a good enough result.

In [476]:
precision_K = true_pos/recommended
recall_K = true_pos/relevant

precision_K, recall_K

(0.9881756756756757, 0.609163484901076)

For each person

In [477]:
precision_i = np.zeros(300)
recalls_i = np.zeros(300)

for i in range(len(true_pos_list)):
  precision_i[i] = true_pos_list[i]/rec_list[i] if rec_list[i] != 0 else 1
  recalls_i[i] = true_pos_list[i]/rel_list[i] if rel_list[i] != 0 else 1

prec_mean = sum(precision_i)/len(precision_i)
rec_mean = sum(recalls_i)/len(recalls_i)

prec_mean, rec_mean

(0.9922433862433861, 0.6139365079365082)

For all Predictions:

In [478]:
true_pos = 0
relevant = 0
recommended = 0

for i in range(len(Prediction_Matrix_Sparsed_Test.values)):
  for j in range(len(Prediction_Matrix_Sparsed_Test.values[i])):
    if Prediction_Matrix_Sparsed_Test.values[i][j] >= 0.5 and Rating_Real_test_Scaled.values[i][j] >= 0.5:
      true_pos = true_pos + 1
   
    if Rating_Real_test_Scaled.values[i][j] >= 0.5:
      relevant = relevant+1
    
    if Prediction_Matrix_Sparsed_Test.values[i][j] >= 0.5:
      recommended = recommended + 1

true_pos, relevant, recommended


(18888, 53413, 24859)

In [479]:
true_pos/recommended, true_pos/relevant

(0.7598053019027314, 0.3536217774699043)

# Using the model from collaborative to make recommendation to a User

In [507]:
User_Anime = User_data[['anime_id', 'title', 'user_rating']].copy()


In [508]:
User_Anime.dropna(inplace=True)
User_Anime

Unnamed: 0,anime_id,title,user_rating
0,31646,3-gatsu no Lion,9.0
1,1,Cowboy Bebop,8.0
2,813,Dragon Ball Z,9.0
3,431,Howl no Ugoku Shiro,7.0
4,20,Naruto,8.0
5,30,Neon Genesis Evangelion,8.0
6,13601,Psycho-Pass,8.0
7,2236,Toki wo Kakeru Shoujo,7.0
8,523,Tonari no Totoro,7.0
9,35968,Wotaku ni Koi wa Muzukashii,9.0


In [509]:
User_Anime_Pivot = pd.DataFrame(User_Anime.user_rating.values, index = User_Anime.title.values.tolist(), columns = ['user_rating'])
User_Anime_Pivot_Normalized = User_Anime_Pivot - User_Anime_Pivot.mean()

User_Anime_Pivot_Normalized

Unnamed: 0,user_rating
3-gatsu no Lion,1.0
Cowboy Bebop,0.0
Dragon Ball Z,1.0
Howl no Ugoku Shiro,-1.0
Naruto,0.0
Neon Genesis Evangelion,0.0
Psycho-Pass,0.0
Toki wo Kakeru Shoujo,-1.0
Tonari no Totoro,-1.0
Wotaku ni Koi wa Muzukashii,1.0


In [510]:
J_list = []
lambda_reg = 0.001
alpha = 0.0008

User_Coeff = np.random.rand(10,1)
User_Anime_Pivot;

Anime_Watched_Features = anime_features_svd[anime_features_svd.index.isin(User_Anime.title.tolist())]

Anime_Watched_Features = Anime_Watched_Features.values @ s_matrix

m = len(Anime_Watched_Features)

J_Cost_temp = 10**20

for i in range(25000):
  Rating_Predict_User = Anime_Watched_Features @ User_Coeff
  Reg_Term = (lambda_reg/(2*m))*(((User_Coeff**2).sum(axis=1)).sum())
  
  J_Cost = (1/(2*m))*(((Rating_Predict_User - User_Anime_Pivot_Normalized.values)**2).sum().sum()) + Reg_Term
  J_Cost_temp = J_Cost
  J_list.append(J_Cost)

  User_Coeff_temp = (alpha/m)*((((Rating_Predict_User - User_Anime_Pivot_Normalized.values)).transpose()).dot(Anime_Watched_Features))
  User_Coeff = (1-lambda_reg*alpha/m)*User_Coeff - User_Coeff_temp.transpose()

In [511]:
Anime_Recommend_User = pd.DataFrame(((anime_features_svd.values @ s_matrix) @ User_Coeff), index=anime_features_svd.index.tolist(), columns=['Recommendation Rating'])

In [512]:
Anime_Recommend_User = Anime_Recommend_User[~Anime_Recommend_User.index.isin(User_data.title.tolist())]

Scaling the range of ratings to be between 0 and 1

In [513]:
Anime_Recommend_User = (Anime_Recommend_User - Anime_Recommend_User.min()[0])/(Anime_Recommend_User.max()[0] - Anime_Recommend_User.min()[0]) # Just to show values as < 1, but not necessary

In [514]:
Anime_Recommend_User.sort_values(by='Recommendation Rating', ascending=False)[:20]

Unnamed: 0,Recommendation Rating
Shingeki no Kyojin Season 2,1.0
Hunter x Hunter (2011),0.999689
Boku no Hero Academia 2nd Season,0.975401
Boku no Hero Academia,0.959929
Fullmetal Alchemist: Brotherhood,0.94017
One Punch Man,0.932124
Mob Psycho 100,0.897651
Kizumonogatari I: Tekketsu-hen,0.894225
Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,0.892695
Kenpuu Denki Berserk,0.872394


In [529]:
Anime_Recommend_User.loc['R.O.D the TV']

Recommendation Rating    0.629336
Name: R.O.D the TV, dtype: float64

I also scaled the values obtained in the content based step to have a range of 0~1

In [516]:
Recommend_Content = Recommend[Recommend.index.isin(anime_features_svd.index.tolist())]
Recommend_Content_DF = pd.DataFrame(Recommend_Content.values, index=Recommend_Content.index.tolist(), columns=['Recommendation Rating'])
Recommend_Content_DF = Recommend_Content_DF.reindex(Anime_Recommend_User.index)

Recommend_Content_DF = (Recommend_Content_DF - Recommend_Content_DF.min())/(Recommend_Content_DF.max() - Recommend_Content_DF.min()) 

Recommend_Content_DF.sort_values(by='Recommendation Rating', ascending=False)[:20]

Unnamed: 0,Recommendation Rating
Urusei Yatsura Movie 6: Itsudatte My Darling,1.0
Urusei Yatsura Movie 4: Lum The Forever,1.0
Urusei Yatsura Movie 3: Remember My Love,1.0
Urusei Yatsura Movie 2: Beautiful Dreamer,1.0
Urusei Yatsura Movie 1: Only You,1.0
Urusei Yatsura,1.0
Trigun: Badlands Rumble,0.972851
R.O.D the TV,0.972851
Generator Gawl,0.972851
Trigun,0.972851


This is the final result, where we have a good list of recommendations for the user based both in his content preferences and in his past animes ratings. I used a list of most action/adventure animes that I have watched so I could see if the final result would make sense, and it seems that the system worked as intended.

I scaled the recommendation ratings of both steps and then got the average between the two values of each anime to get a hybrid recommendation rating. I also though of using a the value from the content based step as a weight, and use it to multiply by the recommendation rating from the collaborative step. I chose to use the average approach, though, since it gives equal weight to both systems.

In [527]:
Hybrid_Recommendation = (Anime_Recommend_User + Recommend_Content_DF)/2
Hybrid_Recommendation.sort_values(by='Recommendation Rating', ascending=False)[:40]

Unnamed: 0,Recommendation Rating
Trigun: Badlands Rumble,0.863064
Urusei Yatsura,0.862003
Fullmetal Alchemist: Brotherhood,0.861951
Urusei Yatsura Movie 2: Beautiful Dreamer,0.849732
Digimon Adventure tri. 1: Saikai,0.846965
Urusei Yatsura Movie 6: Itsudatte My Darling,0.845409
Urusei Yatsura Movie 4: Lum The Forever,0.844337
Urusei Yatsura Movie 3: Remember My Love,0.844225
Cowboy Bebop: Yose Atsume Blues,0.842728
Urusei Yatsura Movie 1: Only You,0.84266
