In [1]:
# Import relevant libraries 

import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator
%matplotlib inline

In [2]:
# Reading the dataset files in panda dataframe 
anime = pd.read_csv('anime-recommendations-database/anime.csv')
rating = pd.read_csv('anime-recommendations-database/rating.csv')

In [3]:
rating.rating.replace({-1: np.nan}, inplace = True)
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,
1,1,24,
2,1,79,
3,1,226,
4,1,241,


In [4]:
anime_tv = anime[anime['type']=='TV']
anime_tv.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351


In [91]:
merged = rating.merge(anime_tv, left_on = 'anime_id', right_on = 'anime_id', suffixes= ['_user', ''])
merged.rename(columns = {'rating_user':'user_rating'}, inplace = True)

merged=merged[['user_id', 'anime_id', 'user_rating']]


In [92]:
merged_sub= merged[merged.user_id < 1000]
sub_data = merged_sub[merged_sub.anime_id<2000]
sub_data.head()

Unnamed: 0,user_id,anime_id,user_rating
0,1,20,
1,3,20,8.0
2,5,20,6.0
3,6,20,
4,10,20,


In [93]:
from sklearn.model_selection import train_test_split


train, test = train_test_split(sub_data)

In [94]:
user_item = [[0 for i in range(2000)] for j in range(1000)]


In [95]:
for row in train.iterrows():
    row1 = list(row)[1]
    user_item[int(row1.user_id)][int(row1.anime_id)] = row1.user_rating

In [97]:
# Matrix Factorization function for calculation of matrixes with latent factors=2 
def matrix_factorization(user_item, factor1, factor2, num, steps=300, alpha=0.002, beta=0.2):
    factor2 = factor2.T
    for step in range(steps):
        for i in range(len(user_item)):
            for j in range(len(user_item[i])):
                if user_item[i][j] > 0:
                    eij = user_item[i][j] - np.dot(factor1[i,:],factor2[:,j])
                    for k in range(K):
                        factor1[i][k] = factor1[i][k] + alpha * (2 * eij * factor2[k][j] - beta * factor1[i][k])
                        factor2[k][j] = factor2[k][j] + alpha * (2 * eij * factor1[i][k] - beta * factor2[k][j])
        eR = np.dot(factor1,factor2)
        e = 0
        for i in range(len(user_item)):
            for j in range(len(user_item[i])):
                if user_item[i][j] > 0:
                    e = e + pow(user_item[i][j] - np.dot(factor1[i,:],factor2[:,j]), 2)
                    for k in range(K):
                        e = e + (beta/2) * (pow(factor1[i][k],2) + pow(factor2[k][j],2))
        if e < 0.01:
            break
    return factor1, factor2.T

In [98]:
user_item = np.array(user_item)

K = 2

#Initializing the latent factor matrix with random values.

rand_factor1 = np.random.rand(1000,K)
rand_factor2 = np.random.rand(2000,K)

#Calling the function for finding factors of user X movies matrix.
factor1, factor2 = matrix_factorization(user_item, rand_factor1, rand_factor2, K)

#Finding the prediction matrix for all user X items
predicted = np.dot(factor1, factor2.T)

In [99]:
y_actual = list()
y_pred = list()

for row in test.iterrows():
    row1=list(row)[1]
    y_actual.append(row1.user_rating)
    y_pred.append(predicted[int(row1.user_id)][int(row1.anime_id)])
    
error = abs(np.array(y_pred) - np.array(y_actual))
sq_error = np.square(error)
mae = np.nansum(error)
rmse = np.nansum(sq_error)
rmse /= len(y_pred)
mae /= len(y_pred)

print(mae)

print(rmse)

0.9321489506354841
2.1572001515570784


In [100]:
print("MAE with Matrix Factorization : "+str(mae))

print("RMSE with Matrix Factorization : "+str(rmse))

MAE with Matrix Factorization : 0.9321489506354841
RMSE with Matrix Factorization : 2.1572001515570784
