# Movie-Movie Recommendation


### DESCRIPTION

Consider the ratings dataset below containing data on UserID, MovieID, Rating, and Timestamp. Each line of this file represents one rating of one movie by one user and has the following format: UserID::MovieID::Rating::Timestamp Ratings are made on a 5 star scale with half star increments. UserID: represents the ID of the user MovieID: represents the ID of the movie Timestamp: represents seconds from midnight Coordinated Universal Time (UTC) of January 1, 1970.

### Objective
Predict a movie-movie recommendation model.

In [1]:
# import required libraries
import pandas as pd
import numpy as np

In [2]:
# import dataset
df = pd.read_csv('Recommend.csv', names=['user_id', 'movie_id', 'rating', 'timestamp'])

In [3]:
#Look at the data
df.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
# Look at the shape
df.shape

(100000, 4)

In [5]:
# check for na values
df.isnull().sum()

user_id      0
movie_id     0
rating       0
timestamp    0
dtype: int64

In [6]:
# number of users
n_users = df.user_id.unique().shape[0]
n_users

943

In [7]:
# number of movies
n_movies = df.movie_id.unique().shape[0]
n_movies

1682

In [8]:
# Import sklearn library for model selection and spliting dataset into training and testing
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df, test_size = 0.25)

In [9]:
train_data.shape

(75000, 4)

In [10]:
train_data

Unnamed: 0,user_id,movie_id,rating,timestamp
68068,846,426,1,883949046
85642,873,339,3,891392871
74136,169,199,4,891359353
11232,296,504,5,884197394
61335,64,111,4,889739975
...,...,...,...,...
95394,884,213,4,876859207
70991,666,507,3,880567771
31479,542,693,4,886533395
98224,892,495,4,886609218


In [11]:
test_data.shape

(25000, 4)

In [12]:
test_data

Unnamed: 0,user_id,movie_id,rating,timestamp
589,308,81,5,887737293
1677,303,94,3,879485318
65355,605,949,5,879427164
14872,366,218,3,888857866
62152,698,625,3,886366731
...,...,...,...,...
9121,33,895,3,891964187
88791,612,476,3,875324947
48834,524,965,4,884635288
26475,94,66,2,891721889


In [13]:
# Create an train data matrix, user id as row and movie id as column
train_data_matrix = np.zeros((n_users, n_movies))

In [14]:
train_data_matrix.shape

(943, 1682)

In [15]:
for line in train_data.itertuples():
    train_data_matrix[line[1]-1, line[2]-1] = line[3]
train_data_matrix

array([[5., 3., 4., ..., 0., 0., 0.],
       [4., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]])

In [16]:
# Create an test data matrix, user id as row and movie id as column
test_data_matrix = np.zeros((n_users, n_movies))

In [17]:
test_data_matrix.shape

(943, 1682)

In [18]:
for line in test_data.itertuples():
    test_data_matrix[line[1]-1, line[2]-1] = line[3]
test_data_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [19]:
# create cosine similarity matrices for movie and predict a movie-movie recommendation model
from sklearn.metrics import pairwise_distances

In [20]:
movie_similarity = pairwise_distances(train_data_matrix.T, metric='cosine')

In [21]:
movie_pred = train_data_matrix.dot(movie_similarity) / np.array([np.abs(movie_similarity).sum(axis=1)])

In [22]:
movie_pred

array([[0.3601423 , 0.37468591, 0.38555253, ..., 0.43987287, 0.43020151,
        0.42583466],
       [0.07689702, 0.09049467, 0.08627455, ..., 0.09093084, 0.09268116,
        0.09258121],
       [0.0601847 , 0.06255669, 0.06002096, ..., 0.05612977, 0.06010916,
        0.0607841 ],
       ...,
       [0.03082545, 0.03993049, 0.03774397, ..., 0.04412229, 0.04334926,
        0.04348863],
       [0.11176174, 0.12036151, 0.12566507, ..., 0.13078367, 0.12922005,
        0.13074026],
       [0.2001507 , 0.19112607, 0.21173218, ..., 0.2444638 , 0.23716091,
        0.23769593]])