### Problem Statement
Consider the `Recommend.csv` dataset, containing the data on: userID, MovieID, Rating and Timestamp. each line of this file represents one rating of one movie by one user, and has the following format.
userID:: MovieID:: Rating:: Timestamp::
Ratings are made on a 5 star scale with half star increments.

In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('Recommend.csv')
df.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
from sklearn.model_selection import train_test_split
no_of_users = df.user_id.unique().shape[0]
no_of_movies = df.movie_id.unique().shape[0]
dtrain, dtest = train_test_split(df,test_size=0.25)

In [6]:
dtrain_matrix = np.zeros((no_of_users,no_of_movies))
for line in dtrain.itertuples():
    dtrain_matrix[line[1]-1, line[2]-1] = line[3]
dtrain_matrix

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

In [7]:
dtest_matrix = np.zeros((no_of_users,no_of_movies))
for line in dtest.itertuples():
    dtest_matrix[line[1]-1, line[2]-1] = line[3]
dtest_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
from sklearn.metrics import pairwise_distances
user_similarity = pairwise_distances(dtrain_matrix, metric='cosine')
movie_similarity = pairwise_distances(dtrain_matrix.T, metric='cosine')
mean_user_rating = dtrain_matrix.mean(axis=1)[:,np.newaxis]
ratings_diff = (dtrain_matrix - mean_user_rating)
user_pred = mean_user_rating + user_similarity.dot(ratings_diff)/np.array([np.abs(user_similarity).sum(axis=1)]).T
user_pred

array([[ 1.63896517,  0.61794312,  0.49615109, ...,  0.31358178,
         0.31357112,  0.31109229],
       [ 1.34396552,  0.31695536,  0.14977502, ..., -0.06532523,
        -0.06401362, -0.06727184],
       [ 1.37285369,  0.27462786,  0.11138718, ..., -0.10993251,
        -0.10810137, -0.11145617],
       ...,
       [ 1.20328006,  0.22939491,  0.07154851, ..., -0.13258071,
        -0.13161065, -0.13492021],
       [ 1.3955543 ,  0.33087054,  0.19611658, ..., -0.01673102,
        -0.01632545, -0.01891686],
       [ 1.44608482,  0.39247376,  0.27964658, ...,  0.09504519,
         0.09494439,  0.09255231]])