In [2]:
import pandas as pd

In [3]:
movie_ratings = pd.read_csv('ml-latest-small/ratings.csv')

ratings = movie_ratings[['rating', 'userId', 'movieId']]

ratings = ratings.pivot_table(index = 'movieId', columns = 'userId', values = 'rating') # We have the same users who bought the same item more than once

ratings = ratings.fillna(0)

ratings.head()


userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,4.0,0.0,4.5,0.0,0.0,0.0,...,4.0,0.0,4.0,3.0,4.0,2.5,4.0,2.5,3.0,5.0
2,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,5.0,3.5,0.0,0.0,2.0,0.0,0.0
3,4.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
from math import sqrt

In [5]:
def cosine_similarity(item1, item2):
    nominator = 0
    norm1 = 0
    norm2 = 0
    for i in range(len(item1)):
        nominator += item1[i] * item2[i]
        norm1 += item1[i] ** 2
        norm2 += item2[i] ** 2
    return nominator/(sqrt(norm1) * sqrt(norm2))

In [5]:
cosine_similarity(ratings.iloc[0, :].to_numpy(), ratings.iloc[1, :].to_numpy())


0.4105620635017316

In [6]:
def get_neighbors(data, movie, num_neighbors):
    sims = list()
    movie_list = list(data.index)
    for i in range(len(data)):
        
        sim = cosine_similarity(data.iloc[i, :].to_numpy(),
                                movie.to_numpy())
        #print(sim)
        sims.append((movie_list[i], sim))
        #print(sims)
        sims.sort(key=lambda tup: tup[1], reverse=True)
    neighbor_indices = list()
    neighbor_sims = list()
    for k in range(num_neighbors):
        neighbor_indices.append(sims[k][0])
        neighbor_sims.append(sims[k][1])
        #print(neighbors)
    return neighbor_indices, neighbor_sims


In [7]:
get_neighbors(ratings.head(100), ratings.iloc[0, :], 3)

([1, 32, 34], [1.0, 0.5085926164026988, 0.5051964119803727])

In [14]:
ratings_copy = ratings.head(100).copy()

num_neighbors = 3

neighbor_indices = list()

neighbor_sims = list()

userid = ratings.columns.tolist().index(1)

for m in range(len(ratings.head(100))):

    neighbor_indices.append(get_neighbors(ratings.head(100), ratings.iloc[m, :], num_neighbors)[0])

    #print(neighbor_indices)

    neighbor_sims.append(get_neighbors(ratings.head(100), ratings.iloc[m, :], num_neighbors)[1])

    #print(neighbor_sims)

    if ratings.iloc[m, userid] == 0:

        if m in neighbor_indices[m]:

            movie_id = neighbor_indices[m].index(m)

            neighbor_indices[m].remove(m)

            #print(neighbor_indices[m])

            neighbor_sims[m].pop(movie_id)

            #print(neighbor_sims[m])
        
        else:

            neighbor_indices[m] = neighbor_indices[m][:num_neighbors-1]

            neighbor_sims[m] = neighbor_sims[m][:num_neighbors-1]

        movie_sims_copy = neighbor_sims[m].copy()

        nominator = 0

        for s in range(len(neighbor_indices[m]) -1 ):

            if ratings.iloc[neighbor_indices[m][s], userid] == 0:
                
                movie_sims_copy.pop(s)

            else:
                nominator += neighbor_sims[m][s] * ratings.iloc[neighbor_indices[m][s], userid]

        if len(movie_sims_copy) > 0:

            if sum(movie_sims_copy) > 0:

                predicted_rating = nominator / sum(movie_sims_copy)

            else:
                predicted_rating = 0

        else:
            predicted_rating = 0

        ratings_copy.iloc[m, userid] = predicted_rating


In [51]:
def recommend_movies(user, num_recommend_movies):

    print('The list of the movies {} has watched \n'.format(user))

    for m in ratings.head(100)[ratings.head(100).iloc[:, user] > 0].index.tolist():
        print(m)

    print('\n')

    recommend_movies = []

    for m in ratings.head(100)[ratings.head(100).iloc[:, user] == 0].index.tolist():

        index = ratings.head(100).index.tolist().index(m)

        predicted_rating = ratings_copy.iloc[index, user]

        recommend_movies.append((m, predicted_rating))

    sorted_rm = sorted(recommend_movies, key=lambda x:x[1], reverse=True)

    print('The list of the recommmended movies \n')

    rank = 1

    for recommend_movie in sorted_rm[:num_recommend_movies]:

        print('{}: {} predicted rating:{}'.format(rank, recommend_movie[0], recommend_movie[1]))
        
        rank += 1


In [52]:
recommend_movies(0, 5)

The list of the movies 0 has watched 

1
3
6
47
50
70
101
110


The list of the recommmended movies 

1: 43 predicted rating:3.5989136191042386
2: 89 predicted rating:3.586856913103017
3: 46 predicted rating:3.4618695184807766
4: 97 predicted rating:3.0320608129200948
5: 5 predicted rating:2.713701299482555
