### Libraries Import 

In [1]:
import pandas as pd
import numpy as np

### Data Import 

In [2]:
df = pd.read_csv('amz_data.csv')

In [3]:
df.head()

Unnamed: 0,user_id,Movie1,Movie2,Movie3,Movie4,Movie5,Movie6,Movie7,Movie8,Movie9,...,Movie197,Movie198,Movie199,Movie200,Movie201,Movie202,Movie203,Movie204,Movie205,Movie206
0,A3R5OBKS7OM2IR,5.0,5.0,,,,,,,,...,,,,,,,,,,
1,AH3QC2PC1VTGP,,,2.0,,,,,,,...,,,,,,,,,,
2,A3LKP6WPMP9UKX,,,,5.0,,,,,,...,,,,,,,,,,
3,AVIY68KEPQ5ZD,,,,5.0,,,,,,...,,,,,,,,,,
4,A1CV1WROP5KTTW,,,,,5.0,,,,,...,,,,,,,,,,


### Average rating of each movie

In [4]:
# Average rating of each movie
movies = df.iloc[:,1:207]
movies.mean()

Movie1      5.000000
Movie2      5.000000
Movie3      2.000000
Movie4      5.000000
Movie5      4.103448
              ...   
Movie202    4.333333
Movie203    3.000000
Movie204    4.375000
Movie205    4.628571
Movie206    4.923077
Length: 206, dtype: float64

In [5]:
data= df.copy()

### Merging colums into movies and rating

In [6]:
df2 = pd.melt(df, id_vars="user_id", var_name="movie_id", value_name="rating")

In [7]:
df2.head()

Unnamed: 0,user_id,movie_id,rating
0,A3R5OBKS7OM2IR,Movie1,5.0
1,AH3QC2PC1VTGP,Movie1,
2,A3LKP6WPMP9UKX,Movie1,
3,AVIY68KEPQ5ZD,Movie1,
4,A1CV1WROP5KTTW,Movie1,


### Dropping the null values

In [8]:
df2.dropna(inplace = True)

In [9]:
df2.head()

Unnamed: 0,user_id,movie_id,rating
0,A3R5OBKS7OM2IR,Movie1,5.0
4848,A3R5OBKS7OM2IR,Movie2,5.0
9697,AH3QC2PC1VTGP,Movie3,2.0
14546,A3LKP6WPMP9UKX,Movie4,5.0
14547,AVIY68KEPQ5ZD,Movie4,5.0


### n_user and n_movies for matrix creation

In [10]:
n_user = df2['user_id'].unique().shape[0]
n_movies = df2['movie_id'].unique().shape[0]

In [11]:
n_user, n_movies

(4848, 206)

In [13]:
movies_w_ratings = df2.loc[df2['rating'] == df2['rating'].max(), 'movie_id']
movies_w_ratings[0:5]

0        Movie1
4848     Movie2
14546    Movie4
14547    Movie4
19396    Movie5
Name: movie_id, dtype: object

In [25]:
movie_reviews_count = df2['movie_id'].value_counts()
movie_reviews_count

Movie127    2313
Movie140     578
Movie16      320
Movie103     272
Movie29      243
            ... 
Movie177       1
Movie149       1
Movie203       1
Movie47        1
Movie147       1
Name: movie_id, Length: 206, dtype: int64

### formatting movies column

In [None]:
df2['movie_id'] = df2['movie_id'].map(lambda x: x.lstrip('Movie').rstrip('aAbBcC'))

In [None]:
df2['movie_id'] = df2['movie_id'].astype(int)

In [None]:
df2.dtypes

### List of unique movies and users 

In [None]:
movie_list=df2.movie_id.unique()

In [None]:
user_list=df.user_id.unique()

In [None]:
movie_list

In [None]:
user_list

### function for finding index of movies and users

In [None]:
def get_movieid_index(movie_id):
    itemindex=np.where(movie_list==movie_id)
    return itemindex[0][0]

In [None]:
def get_userid_index(user_id):
    itemindex=np.where(user_list==user_id)
    return itemindex[0][0]

In [None]:
df2[['user_id','movie_id']]

In [None]:
df2['user_id_index']=df2['user_id'].apply(get_userid_index)

In [None]:
df2.columns

In [None]:
df2['movie_id_index']=df2['movie_id'].apply(get_movieid_index)

In [None]:
df2.columns

### Changing the column orders

In [None]:
new_col_order=['user_id_index','movie_id_index','rating']

In [None]:
df2=df2.reindex(columns=new_col_order)

In [None]:
df2[['user_id_index','movie_id_index']]

In [None]:
df2.head()

### Movies with maximum ratings

In [None]:
# Which movies have maximum views/ratings?
print(df2['rating'].max())
df2.loc[df2['rating'] == df2['rating'].max(), 'movie_id_index']

In [None]:
df.describe()

### Splitting the dataset into train and test

In [None]:
from sklearn.model_selection import train_test_split
train_data,test_data=train_test_split(df2,test_size=0.25)

### Creating matrix for calculation

In [None]:
train_data_matrix=np.zeros((n_user,n_movies))

In [None]:
train_data_matrix

In [None]:
train_data

In [None]:
test_data_matrix=np.zeros((n_user,n_movies))

### Filling the matrix with correct ratings

In [None]:
for line in train_data.itertuples():
    train_data_matrix[line[1]-1,line[2]-1]=line[3]

In [None]:
test_data_matrix

In [None]:
test_data_matrix

In [None]:
for line in test_data.itertuples():
    test_data_matrix[line[1]-1,line[2]-1]=line[3]

### importing module for calculating similarity 

In [None]:
from sklearn.metrics import pairwise_distances

In [None]:
user_similarity=pairwise_distances(train_data_matrix,metric='cosine')

In [None]:
movie_similarity=pairwise_distances(train_data_matrix.T,metric='cosine')

In [None]:
user_similarity

In [None]:
movie_similarity

### Function for predicting the movie rating

In [None]:
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)
        #You use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis]) 
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])     
    return pred

In [None]:
movie_prediction = predict(train_data_matrix, movie_similarity, type='item')
user_prediction = predict(train_data_matrix, user_similarity, type='user')

In [None]:
user_prediction

In [None]:
movie_prediction

In [None]:
# For user 1 the recommendations are based on similar users
count = 0
for line in user_prediction:
    if count == 10:
        break
    else:
        print(line.argsort()[-5:][::-1])
        count +=1

### Prediction of perticular user 

In [None]:
# For user 1 the recommendations are based on similar users
line = user_prediction[0][:]
#arr.argsort()[-3:][::-1]
print("Recommended movies for user 1 are: ", line.argsort()[-5:][::-1])

In [None]:
# For user 50 the recommendations are based on similar users
line = user_prediction[50][:]
#arr.argsort()[-3:][::-1]
print("Recommended movies for user 50 are: ",line.argsort()[-5:][::-1])

In [None]:
# For user 1 the recommendations are based on similar movies
line = movie_prediction[0][:]
#arr.argsort()[-3:][::-1]
print("Recommended movies for user 1 are: ",line.argsort()[-5:][::-1])

In [None]:
# For user 50 the recommendations are based on similar movies
line = movie_prediction[50][:]
#arr.argsort()[-3:][::-1]
print("Recommended movies for user 50 are: ",line.argsort()[-5:][::-1])