In [40]:
import re
import math
import copy
import numpy as np
import pandas as pd
from collections import OrderedDict
from sklearn.metrics.pairwise import linear_kernel
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# COLLABORATIVE FILTERING

## Read ratings.csv 

In [25]:
ratings = pd.read_csv('Data/ratings.csv', sep=',')
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


## Data Splitting

In [50]:
# 5-FOLD VALIDATIONS
rating_train1, rating_test1 = train_test_split(ratings, test_size = 0.2 , shuffle=True)
temp1 = copy.deepcopy(rating_test1)
temp1.loc[:,'rating'] = 0
rating_train1_df = pd.concat([rating_train1, temp1])

rating_train2, rating_test2 = train_test_split(ratings, test_size = 0.2 , shuffle=True)
temp2 = copy.deepcopy(rating_test2)
temp2.loc[:,'rating'] = 0
rating_train2_df = pd.concat([rating_train2, temp2])

rating_train3, rating_test3 = train_test_split(ratings, test_size = 0.2 , shuffle=True)
temp3 = copy.deepcopy(rating_test3)
temp3.loc[:,'rating'] = 0
rating_train3_df = pd.concat([rating_train3, temp3])

rating_train4, rating_test4 = train_test_split(ratings, test_size = 0.2 , shuffle=True)
temp4 = copy.deepcopy(rating_test4)
temp4.loc[:,'rating'] = 0
rating_train4_df = pd.concat([rating_train4, temp4])

rating_train5, rating_test5 = train_test_split(ratings, test_size = 0.2 , shuffle=True)
temp5 = copy.deepcopy(rating_test5)
temp5.loc[:,'rating'] = 0
rating_train5_df = pd.concat([rating_train5, temp5])

## Convert rating into matrix representation 

In [51]:
rating1 = rating_train1_df.pivot(index='userId', columns='movieId', values='rating')
rating1 = rating1.fillna(0) # Rating kosong diganti dengan 0
rating1 

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
rating2 = rating_train2_df.pivot(index='userId', columns='movieId', values='rating')
rating2 = rating2.fillna(0) # Rating kosong diganti dengan 0
rating2 

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
rating3 = rating_train3_df.pivot(index='userId', columns='movieId', values='rating')
rating3 = rating3.fillna(0) # Rating kosong diganti dengan 0
rating3 

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
rating4 = rating_train4_df.pivot(index='userId', columns='movieId', values='rating')
rating4 = rating4.fillna(0) # Rating kosong diganti dengan 0
rating4 

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
rating5 = rating_train5_df.pivot(index='userId', columns='movieId', values='rating')
rating5 = rating5.fillna(0) # Rating kosong diganti dengan 0
rating5 

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Matrix Factorization

In [56]:
def matrix_factorization(R, P, Q, K, steps=5, alpha=0.02, beta=0.2):
    Q = Q.T
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j])
                    for k in range(K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
        eR = np.dot(P,Q)
        e = 0
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2)
                    for k in range(K):
                        e = e + (beta/2) * ( pow(P[i][k],2) + pow(Q[k][j],2) )
        if e < 0.001:
            break
    return P, Q.T

def getRatingPredictions(data):
    R = data.to_numpy()

    N = len(R)
    M = len(R[0])
    K = 2

    P = np.random.rand(N,K)
    Q = np.random.rand(M,K)
    
    user_latent_features, item_latent_features = matrix_factorization(R, P, Q, K)
    return np.dot(user_latent_features, item_latent_features.T)

In [57]:
pred1 = getRatingPredictions(rating1)
print('Fold 1 DONE')
pred2 = getRatingPredictions(rating2)
print('Fold 2 DONE')
pred3 = getRatingPredictions(rating3)
print('Fold 3 DONE')
pred4 = getRatingPredictions(rating4)
print('Fold 4 DONE')
pred5 = getRatingPredictions(rating5)
print('Fold 5 DONE')

Fold 1 DONE
Fold 2 DONE
Fold 3 DONE
Fold 4 DONE
Fold 5 DONE


In [58]:
pred1 = pd.DataFrame(data=pred1, index=rating1.index, columns=rating1.columns)
pred2 = pd.DataFrame(data=pred2, index=rating1.index, columns=rating1.columns)
pred3 = pd.DataFrame(data=pred3, index=rating1.index, columns=rating1.columns)
pred4 = pd.DataFrame(data=pred4, index=rating1.index, columns=rating1.columns)
pred5 = pd.DataFrame(data=pred5, index=rating1.index, columns=rating1.columns)

## Evaluations with Recall@K

In [59]:
recall = []
def countRecall(test, pred):
    real = test[test.rating>4]
    rec = copy.deepcopy(real)
    for i in rec.index:
        value = pred[rec.movieId.loc[i]].loc[rec.userId.loc[i]]
        rec.set_value(i, 'rating',value) 
    numerator = rec[rec.rating>4].count().rating
    denominator = real[real.rating>4].count().rating
    return numerator/denominator

recall.append(countRecall(rating_test1,pred1))
recall.append(countRecall(rating_test2,pred2))
recall.append(countRecall(rating_test3,pred3))
recall.append(countRecall(rating_test4,pred4))
recall.append(countRecall(rating_test5,pred5))

recall

  import sys


[0.5902793549852373,
 0.5880576208178439,
 0.5883152173913043,
 0.5855092919313103,
 0.5895504824664627]

In [62]:
fold = recall.index(max(recall))+1
print(fold)
if fold==1:
    rating = rating1
    rating_train = rating_train1
    predict = pred1
elif fold==2:
    rating = rating2
    rating_train = rating_train2
    predict = pred2
elif fold==3:
    rating = rating3
    rating_train = rating_train3
    predict = pred3
elif fold==4:
    rating = rating4
    rating_train = rating_train4
    predict = pred4
elif fold==5:
    rating = rating5
    rating_train = rating_train5
    predict = pred5

1


In [63]:
print("THE ORIGINAL MATRIX")
rating

THE ORIGINAL MATRIX


movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [64]:
print("THE PREDICTED MATRIX USING MATRIX FACTORIZATION")
predict

THE APPROXIMATION MATRIX BY MF


movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.754333,4.311571,3.461073,2.946872,2.990490,4.381082,3.599101,3.488525,3.428278,4.023413,...,2.827272,2.610737,1.844916,1.694750,3.579793,3.288769,0.549332,2.757761,2.821347,3.748232
2,3.292882,3.785213,3.038544,2.586122,2.637244,3.858966,3.143117,3.068279,2.995737,3.548939,...,2.494581,2.292234,1.641451,1.490101,3.146584,2.895942,0.489308,2.447989,2.471202,3.307710
3,2.894937,3.398591,2.728194,2.302331,2.601575,3.716151,2.494196,2.866155,2.412995,3.516352,...,2.485909,2.062390,1.903538,1.382272,2.900487,2.771305,0.578318,2.729079,2.105970,3.306819
4,2.731599,3.119852,2.504430,2.137127,2.107153,3.109100,2.683952,2.497269,2.547925,2.831208,...,1.986030,1.888088,1.230596,1.215543,2.572049,2.338179,0.363736,1.866505,2.068930,2.630373
5,2.982484,3.406089,2.734204,2.333288,2.299458,3.393254,2.931627,2.725900,2.782897,3.089524,...,2.167169,2.061296,1.341626,1.326872,2.807698,2.551954,0.396502,2.035434,2.259241,2.870232
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.996924,3.449005,2.768657,2.355309,2.416201,3.530404,2.845421,2.802035,2.714019,3.252352,...,2.286911,2.088877,1.519929,1.360255,2.871353,2.648388,0.453697,2.260555,2.245334,3.032947
607,2.933402,3.377183,2.711003,2.305907,2.370109,3.461429,2.780243,2.745697,2.652501,3.190587,...,2.243736,2.045456,1.496044,1.332730,2.812921,2.596331,0.446761,2.223080,2.196538,2.975879
608,3.595693,4.140309,3.323595,2.826785,2.907784,4.245863,3.405518,3.367134,3.249373,3.914530,...,2.752967,2.507696,1.837982,1.634282,3.449224,3.184559,0.548970,2.730219,2.691860,3.651370
609,2.798821,3.204975,2.572763,2.193107,2.192367,3.223744,2.718287,2.578604,2.584614,2.947589,...,2.069413,1.940112,1.315150,1.253972,2.651157,2.422278,0.390148,1.980435,2.111996,2.742112


# CONTENT BASED FILTERING

## Clustering over Movie Genres

In [41]:
movies = pd.read_csv('Data/movies.csv', sep=',')
movies.genres = movies.genres.str.lower()
movies.genres = movies.genres.str.replace(" ", "")
movies.genres = movies.genres.str.replace("-", "")
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),adventure|animation|children|comedy|fantasy
1,2,Jumanji (1995),adventure|children|fantasy
2,3,Grumpier Old Men (1995),comedy|romance
3,4,Waiting to Exhale (1995),comedy|drama|romance
4,5,Father of the Bride Part II (1995),comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),action|animation|comedy|fantasy
9738,193583,No Game No Life: Zero (2017),animation|comedy|fantasy
9739,193585,Flint (2017),drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),action|animation


In [42]:
v = TfidfVectorizer()
tfidf = v.fit_transform(movies['genres'])
tfidf.shape

(9742, 20)

In [43]:
cos_sim = linear_kernel(tfidf, tfidf)
cos_sim[0]

array([1.        , 0.81357774, 0.15276924, ..., 0.        , 0.4210373 ,
       0.26758648])

In [44]:
cosine_similarity = pd.DataFrame(data=cos_sim, index=movies.movieId, columns=movies.movieId)
cosine_similarity

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.813578,0.152769,0.135135,0.267586,0.000000,0.152769,0.654698,0.000000,0.262413,...,0.411168,0.465621,0.196578,0.516225,0.0,0.680258,0.755891,0.000000,0.421037,0.267586
2,0.813578,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.804715,0.000000,0.322542,...,0.000000,0.000000,0.000000,0.000000,0.0,0.341376,0.379331,0.000000,0.000000,0.000000
3,0.152769,0.000000,1.000000,0.884571,0.570915,0.000000,1.000000,0.000000,0.000000,0.000000,...,0.185790,0.000000,0.419413,0.000000,0.0,0.181883,0.202105,0.000000,0.000000,0.570915
4,0.135135,0.000000,0.884571,1.000000,0.505015,0.000000,0.884571,0.000000,0.000000,0.000000,...,0.164344,0.201391,0.687440,0.000000,0.0,0.160888,0.178776,0.466405,0.000000,0.505015
5,0.267586,0.000000,0.570915,0.505015,1.000000,0.000000,0.570915,0.000000,0.000000,0.000000,...,0.325424,0.000000,0.734632,0.000000,0.0,0.318581,0.354002,0.000000,0.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.680258,0.341376,0.181883,0.160888,0.318581,0.239513,0.181883,0.000000,0.436010,0.241142,...,0.683714,0.554355,0.234040,0.614603,0.0,1.000000,0.899942,0.000000,0.753553,0.318581
193583,0.755891,0.379331,0.202105,0.178776,0.354002,0.000000,0.202105,0.000000,0.000000,0.000000,...,0.543952,0.615990,0.260061,0.682937,0.0,0.899942,1.000000,0.000000,0.557008,0.354002
193585,0.000000,0.000000,0.000000,0.466405,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.431794,0.678466,0.000000,0.0,0.000000,0.000000,1.000000,0.000000,0.000000
193587,0.421037,0.000000,0.000000,0.000000,0.000000,0.317844,0.000000,0.000000,0.578606,0.320007,...,0.769740,0.735655,0.000000,0.815607,0.0,0.753553,0.557008,0.000000,1.000000,0.000000


In [47]:
genre = []
for movie_id in predict.columns:
    sim = []
    if movie_id in cosine_similarity.columns:
        for i in cosine_similarity.columns:
            if movie_id != i:
                sim.append([i,cosine_similarity[i].loc[movie_id]])
        sim.sort(key = lambda sim: sim[1], reverse=True)
        sim = np.array(sim[:20])
        sim = sim[:,0].tolist()        
        sim = [int(x) for x in sim]
    genre.append([movie_id,copy.deepcopy(sim)])
    print('Movie',movie_id,'done.')

Movie 1 done.
Movie 2 done.
Movie 3 done.
Movie 4 done.
Movie 5 done.
Movie 6 done.
Movie 7 done.
Movie 8 done.
Movie 9 done.
Movie 10 done.
Movie 11 done.
Movie 12 done.
Movie 13 done.
Movie 14 done.
Movie 15 done.
Movie 16 done.
Movie 17 done.
Movie 18 done.
Movie 19 done.
Movie 20 done.
Movie 21 done.
Movie 22 done.
Movie 23 done.
Movie 24 done.
Movie 25 done.
Movie 26 done.
Movie 27 done.
Movie 28 done.
Movie 29 done.
Movie 30 done.
Movie 31 done.
Movie 32 done.
Movie 34 done.
Movie 36 done.
Movie 38 done.
Movie 39 done.
Movie 40 done.
Movie 41 done.
Movie 42 done.
Movie 43 done.
Movie 44 done.
Movie 45 done.
Movie 46 done.
Movie 47 done.
Movie 48 done.
Movie 49 done.
Movie 50 done.
Movie 52 done.
Movie 53 done.
Movie 54 done.
Movie 55 done.
Movie 57 done.
Movie 58 done.
Movie 60 done.
Movie 61 done.
Movie 62 done.
Movie 63 done.
Movie 64 done.
Movie 65 done.
Movie 66 done.
Movie 68 done.
Movie 69 done.
Movie 70 done.
Movie 71 done.
Movie 72 done.
Movie 73 done.
Movie 74 done.
Movi

Movie 606 done.
Movie 608 done.
Movie 609 done.
Movie 610 done.
Movie 611 done.
Movie 612 done.
Movie 613 done.
Movie 615 done.
Movie 616 done.
Movie 617 done.
Movie 618 done.
Movie 619 done.
Movie 626 done.
Movie 627 done.
Movie 628 done.
Movie 631 done.
Movie 632 done.
Movie 633 done.
Movie 634 done.
Movie 635 done.
Movie 636 done.
Movie 637 done.
Movie 638 done.
Movie 639 done.
Movie 640 done.
Movie 645 done.
Movie 647 done.
Movie 648 done.
Movie 649 done.
Movie 650 done.
Movie 653 done.
Movie 656 done.
Movie 661 done.
Movie 662 done.
Movie 663 done.
Movie 665 done.
Movie 667 done.
Movie 668 done.
Movie 670 done.
Movie 671 done.
Movie 673 done.
Movie 674 done.
Movie 678 done.
Movie 679 done.
Movie 680 done.
Movie 685 done.
Movie 688 done.
Movie 691 done.
Movie 692 done.
Movie 694 done.
Movie 695 done.
Movie 697 done.
Movie 698 done.
Movie 700 done.
Movie 703 done.
Movie 704 done.
Movie 706 done.
Movie 707 done.
Movie 708 done.
Movie 709 done.
Movie 710 done.
Movie 711 done.
Movie 71

Movie 1328 done.
Movie 1329 done.
Movie 1330 done.
Movie 1331 done.
Movie 1332 done.
Movie 1333 done.
Movie 1334 done.
Movie 1335 done.
Movie 1336 done.
Movie 1337 done.
Movie 1339 done.
Movie 1340 done.
Movie 1341 done.
Movie 1342 done.
Movie 1343 done.
Movie 1344 done.
Movie 1345 done.
Movie 1346 done.
Movie 1347 done.
Movie 1348 done.
Movie 1349 done.
Movie 1350 done.
Movie 1351 done.
Movie 1352 done.
Movie 1353 done.
Movie 1354 done.
Movie 1355 done.
Movie 1356 done.
Movie 1357 done.
Movie 1358 done.
Movie 1359 done.
Movie 1361 done.
Movie 1363 done.
Movie 1365 done.
Movie 1366 done.
Movie 1367 done.
Movie 1370 done.
Movie 1371 done.
Movie 1372 done.
Movie 1373 done.
Movie 1374 done.
Movie 1375 done.
Movie 1376 done.
Movie 1377 done.
Movie 1378 done.
Movie 1379 done.
Movie 1380 done.
Movie 1381 done.
Movie 1382 done.
Movie 1385 done.
Movie 1387 done.
Movie 1388 done.
Movie 1389 done.
Movie 1390 done.
Movie 1391 done.
Movie 1392 done.
Movie 1393 done.
Movie 1394 done.
Movie 1395 don

Movie 2024 done.
Movie 2025 done.
Movie 2026 done.
Movie 2027 done.
Movie 2028 done.
Movie 2032 done.
Movie 2033 done.
Movie 2034 done.
Movie 2035 done.
Movie 2036 done.
Movie 2037 done.
Movie 2038 done.
Movie 2040 done.
Movie 2041 done.
Movie 2042 done.
Movie 2043 done.
Movie 2044 done.
Movie 2046 done.
Movie 2048 done.
Movie 2050 done.
Movie 2051 done.
Movie 2052 done.
Movie 2053 done.
Movie 2054 done.
Movie 2055 done.
Movie 2056 done.
Movie 2057 done.
Movie 2058 done.
Movie 2059 done.
Movie 2060 done.
Movie 2064 done.
Movie 2065 done.
Movie 2066 done.
Movie 2067 done.
Movie 2068 done.
Movie 2069 done.
Movie 2070 done.
Movie 2071 done.
Movie 2072 done.
Movie 2073 done.
Movie 2074 done.
Movie 2075 done.
Movie 2076 done.
Movie 2077 done.
Movie 2078 done.
Movie 2080 done.
Movie 2081 done.
Movie 2082 done.
Movie 2083 done.
Movie 2084 done.
Movie 2085 done.
Movie 2087 done.
Movie 2088 done.
Movie 2089 done.
Movie 2090 done.
Movie 2091 done.
Movie 2092 done.
Movie 2093 done.
Movie 2094 don

Movie 2630 done.
Movie 2632 done.
Movie 2633 done.
Movie 2634 done.
Movie 2639 done.
Movie 2640 done.
Movie 2641 done.
Movie 2642 done.
Movie 2643 done.
Movie 2644 done.
Movie 2648 done.
Movie 2651 done.
Movie 2652 done.
Movie 2654 done.
Movie 2655 done.
Movie 2656 done.
Movie 2657 done.
Movie 2659 done.
Movie 2660 done.
Movie 2661 done.
Movie 2662 done.
Movie 2664 done.
Movie 2665 done.
Movie 2668 done.
Movie 2669 done.
Movie 2670 done.
Movie 2671 done.
Movie 2672 done.
Movie 2674 done.
Movie 2676 done.
Movie 2677 done.
Movie 2681 done.
Movie 2682 done.
Movie 2683 done.
Movie 2686 done.
Movie 2687 done.
Movie 2688 done.
Movie 2690 done.
Movie 2691 done.
Movie 2692 done.
Movie 2693 done.
Movie 2694 done.
Movie 2695 done.
Movie 2696 done.
Movie 2697 done.
Movie 2698 done.
Movie 2699 done.
Movie 2700 done.
Movie 2701 done.
Movie 2702 done.
Movie 2706 done.
Movie 2707 done.
Movie 2708 done.
Movie 2709 done.
Movie 2710 done.
Movie 2712 done.
Movie 2713 done.
Movie 2716 done.
Movie 2717 don

Movie 3280 done.
Movie 3281 done.
Movie 3283 done.
Movie 3284 done.
Movie 3285 done.
Movie 3286 done.
Movie 3287 done.
Movie 3289 done.
Movie 3294 done.
Movie 3295 done.
Movie 3296 done.
Movie 3298 done.
Movie 3299 done.
Movie 3300 done.
Movie 3301 done.
Movie 3302 done.
Movie 3303 done.
Movie 3306 done.
Movie 3307 done.
Movie 3308 done.
Movie 3310 done.
Movie 3313 done.
Movie 3315 done.
Movie 3316 done.
Movie 3317 done.
Movie 3323 done.
Movie 3324 done.
Movie 3325 done.
Movie 3326 done.
Movie 3327 done.
Movie 3328 done.
Movie 3329 done.
Movie 3330 done.
Movie 3331 done.
Movie 3334 done.
Movie 3341 done.
Movie 3342 done.
Movie 3344 done.
Movie 3345 done.
Movie 3347 done.
Movie 3350 done.
Movie 3353 done.
Movie 3354 done.
Movie 3355 done.
Movie 3357 done.
Movie 3358 done.
Movie 3359 done.
Movie 3360 done.
Movie 3361 done.
Movie 3362 done.
Movie 3363 done.
Movie 3364 done.
Movie 3365 done.
Movie 3368 done.
Movie 3370 done.
Movie 3372 done.
Movie 3374 done.
Movie 3378 done.
Movie 3379 don

Movie 3952 done.
Movie 3953 done.
Movie 3955 done.
Movie 3957 done.
Movie 3958 done.
Movie 3959 done.
Movie 3962 done.
Movie 3963 done.
Movie 3964 done.
Movie 3965 done.
Movie 3966 done.
Movie 3967 done.
Movie 3968 done.
Movie 3969 done.
Movie 3971 done.
Movie 3972 done.
Movie 3973 done.
Movie 3974 done.
Movie 3977 done.
Movie 3978 done.
Movie 3979 done.
Movie 3980 done.
Movie 3981 done.
Movie 3983 done.
Movie 3984 done.
Movie 3985 done.
Movie 3986 done.
Movie 3987 done.
Movie 3988 done.
Movie 3989 done.
Movie 3990 done.
Movie 3991 done.
Movie 3992 done.
Movie 3993 done.
Movie 3994 done.
Movie 3996 done.
Movie 3997 done.
Movie 3998 done.
Movie 3999 done.
Movie 4000 done.
Movie 4002 done.
Movie 4003 done.
Movie 4005 done.
Movie 4006 done.
Movie 4007 done.
Movie 4008 done.
Movie 4009 done.
Movie 4010 done.
Movie 4011 done.
Movie 4012 done.
Movie 4014 done.
Movie 4015 done.
Movie 4016 done.
Movie 4017 done.
Movie 4018 done.
Movie 4019 done.
Movie 4020 done.
Movie 4021 done.
Movie 4022 don

Movie 4675 done.
Movie 4676 done.
Movie 4677 done.
Movie 4678 done.
Movie 4679 done.
Movie 4681 done.
Movie 4683 done.
Movie 4686 done.
Movie 4687 done.
Movie 4688 done.
Movie 4689 done.
Movie 4690 done.
Movie 4695 done.
Movie 4697 done.
Movie 4699 done.
Movie 4700 done.
Movie 4701 done.
Movie 4703 done.
Movie 4704 done.
Movie 4705 done.
Movie 4708 done.
Movie 4709 done.
Movie 4710 done.
Movie 4711 done.
Movie 4713 done.
Movie 4714 done.
Movie 4715 done.
Movie 4717 done.
Movie 4718 done.
Movie 4719 done.
Movie 4720 done.
Movie 4721 done.
Movie 4722 done.
Movie 4723 done.
Movie 4725 done.
Movie 4727 done.
Movie 4728 done.
Movie 4732 done.
Movie 4733 done.
Movie 4734 done.
Movie 4735 done.
Movie 4736 done.
Movie 4738 done.
Movie 4740 done.
Movie 4741 done.
Movie 4743 done.
Movie 4744 done.
Movie 4745 done.
Movie 4748 done.
Movie 4749 done.
Movie 4750 done.
Movie 4754 done.
Movie 4756 done.
Movie 4757 done.
Movie 4759 done.
Movie 4765 done.
Movie 4766 done.
Movie 4769 done.
Movie 4770 don

Movie 5500 done.
Movie 5501 done.
Movie 5502 done.
Movie 5503 done.
Movie 5504 done.
Movie 5505 done.
Movie 5506 done.
Movie 5507 done.
Movie 5508 done.
Movie 5512 done.
Movie 5513 done.
Movie 5515 done.
Movie 5521 done.
Movie 5522 done.
Movie 5523 done.
Movie 5524 done.
Movie 5525 done.
Movie 5527 done.
Movie 5528 done.
Movie 5529 done.
Movie 5530 done.
Movie 5531 done.
Movie 5532 done.
Movie 5537 done.
Movie 5538 done.
Movie 5539 done.
Movie 5540 done.
Movie 5541 done.
Movie 5543 done.
Movie 5544 done.
Movie 5548 done.
Movie 5550 done.
Movie 5553 done.
Movie 5556 done.
Movie 5560 done.
Movie 5562 done.
Movie 5563 done.
Movie 5564 done.
Movie 5568 done.
Movie 5569 done.
Movie 5570 done.
Movie 5572 done.
Movie 5573 done.
Movie 5574 done.
Movie 5575 done.
Movie 5577 done.
Movie 5580 done.
Movie 5581 done.
Movie 5582 done.
Movie 5584 done.
Movie 5585 done.
Movie 5588 done.
Movie 5589 done.
Movie 5590 done.
Movie 5591 done.
Movie 5596 done.
Movie 5597 done.
Movie 5601 done.
Movie 5602 don

Movie 6460 done.
Movie 6461 done.
Movie 6464 done.
Movie 6465 done.
Movie 6466 done.
Movie 6476 done.
Movie 6477 done.
Movie 6480 done.
Movie 6482 done.
Movie 6483 done.
Movie 6484 done.
Movie 6493 done.
Movie 6502 done.
Movie 6503 done.
Movie 6506 done.
Movie 6509 done.
Movie 6510 done.
Movie 6514 done.
Movie 6515 done.
Movie 6516 done.
Movie 6517 done.
Movie 6523 done.
Movie 6527 done.
Movie 6528 done.
Movie 6530 done.
Movie 6533 done.
Movie 6534 done.
Movie 6535 done.
Movie 6536 done.
Movie 6537 done.
Movie 6538 done.
Movie 6539 done.
Movie 6541 done.
Movie 6542 done.
Movie 6545 done.
Movie 6547 done.
Movie 6548 done.
Movie 6549 done.
Movie 6550 done.
Movie 6551 done.
Movie 6552 done.
Movie 6553 done.
Movie 6554 done.
Movie 6557 done.
Movie 6558 done.
Movie 6559 done.
Movie 6560 done.
Movie 6561 done.
Movie 6563 done.
Movie 6564 done.
Movie 6565 done.
Movie 6566 done.
Movie 6567 done.
Movie 6568 done.
Movie 6571 done.
Movie 6572 done.
Movie 6573 done.
Movie 6574 done.
Movie 6577 don

Movie 7312 done.
Movie 7315 done.
Movie 7316 done.
Movie 7317 done.
Movie 7318 done.
Movie 7319 done.
Movie 7320 done.
Movie 7321 done.
Movie 7323 done.
Movie 7324 done.
Movie 7325 done.
Movie 7326 done.
Movie 7327 done.
Movie 7328 done.
Movie 7333 done.
Movie 7335 done.
Movie 7336 done.
Movie 7340 done.
Movie 7344 done.
Movie 7345 done.
Movie 7346 done.
Movie 7347 done.
Movie 7348 done.
Movie 7349 done.
Movie 7352 done.
Movie 7354 done.
Movie 7357 done.
Movie 7360 done.
Movie 7361 done.
Movie 7362 done.
Movie 7364 done.
Movie 7366 done.
Movie 7367 done.
Movie 7368 done.
Movie 7369 done.
Movie 7371 done.
Movie 7372 done.
Movie 7373 done.
Movie 7375 done.
Movie 7376 done.
Movie 7377 done.
Movie 7379 done.
Movie 7380 done.
Movie 7381 done.
Movie 7382 done.
Movie 7386 done.
Movie 7387 done.
Movie 7390 done.
Movie 7394 done.
Movie 7395 done.
Movie 7396 done.
Movie 7411 done.
Movie 7414 done.
Movie 7419 done.
Movie 7438 done.
Movie 7439 done.
Movie 7440 done.
Movie 7443 done.
Movie 7444 don

Movie 8949 done.
Movie 8950 done.
Movie 8951 done.
Movie 8952 done.
Movie 8954 done.
Movie 8955 done.
Movie 8957 done.
Movie 8958 done.
Movie 8959 done.
Movie 8961 done.
Movie 8964 done.
Movie 8965 done.
Movie 8966 done.
Movie 8967 done.
Movie 8968 done.
Movie 8969 done.
Movie 8970 done.
Movie 8972 done.
Movie 8973 done.
Movie 8974 done.
Movie 8977 done.
Movie 8978 done.
Movie 8979 done.
Movie 8981 done.
Movie 8982 done.
Movie 8983 done.
Movie 8984 done.
Movie 8985 done.
Movie 8987 done.
Movie 8989 done.
Movie 8998 done.
Movie 9004 done.
Movie 9005 done.
Movie 9008 done.
Movie 9010 done.
Movie 9018 done.
Movie 25746 done.
Movie 25750 done.
Movie 25752 done.
Movie 25753 done.
Movie 25757 done.
Movie 25769 done.
Movie 25771 done.
Movie 25773 done.
Movie 25782 done.
Movie 25788 done.
Movie 25795 done.
Movie 25797 done.
Movie 25805 done.
Movie 25825 done.
Movie 25826 done.
Movie 25827 done.
Movie 25833 done.
Movie 25834 done.
Movie 25841 done.
Movie 25850 done.
Movie 25856 done.
Movie 2586

Movie 32234 done.
Movie 32243 done.
Movie 32289 done.
Movie 32291 done.
Movie 32294 done.
Movie 32296 done.
Movie 32298 done.
Movie 32300 done.
Movie 32302 done.
Movie 32314 done.
Movie 32387 done.
Movie 32392 done.
Movie 32440 done.
Movie 32442 done.
Movie 32456 done.
Movie 32460 done.
Movie 32469 done.
Movie 32511 done.
Movie 32515 done.
Movie 32554 done.
Movie 32582 done.
Movie 32584 done.
Movie 32587 done.
Movie 32589 done.
Movie 32596 done.
Movie 32598 done.
Movie 32600 done.
Movie 32620 done.
Movie 32632 done.
Movie 32649 done.
Movie 32657 done.
Movie 32659 done.
Movie 32666 done.
Movie 32728 done.
Movie 32743 done.
Movie 32770 done.
Movie 32799 done.
Movie 32862 done.
Movie 32875 done.
Movie 32892 done.
Movie 32898 done.
Movie 32906 done.
Movie 32914 done.
Movie 32917 done.
Movie 33004 done.
Movie 33085 done.
Movie 33090 done.
Movie 33124 done.
Movie 33126 done.
Movie 33132 done.
Movie 33138 done.
Movie 33145 done.
Movie 33148 done.
Movie 33154 done.
Movie 33158 done.
Movie 3316

Movie 47793 done.
Movie 47810 done.
Movie 47894 done.
Movie 47937 done.
Movie 47950 done.
Movie 47952 done.
Movie 47970 done.
Movie 47978 done.
Movie 47997 done.
Movie 47999 done.
Movie 48001 done.
Movie 48032 done.
Movie 48043 done.
Movie 48045 done.
Movie 48082 done.
Movie 48142 done.
Movie 48150 done.
Movie 48161 done.
Movie 48214 done.
Movie 48262 done.
Movie 48304 done.
Movie 48319 done.
Movie 48322 done.
Movie 48326 done.
Movie 48342 done.
Movie 48385 done.
Movie 48394 done.
Movie 48412 done.
Movie 48414 done.
Movie 48416 done.
Movie 48516 done.
Movie 48518 done.
Movie 48520 done.
Movie 48560 done.
Movie 48593 done.
Movie 48596 done.
Movie 48598 done.
Movie 48638 done.
Movie 48649 done.
Movie 48678 done.
Movie 48696 done.
Movie 48698 done.
Movie 48738 done.
Movie 48741 done.
Movie 48744 done.
Movie 48774 done.
Movie 48780 done.
Movie 48783 done.
Movie 48872 done.
Movie 48877 done.
Movie 48879 done.
Movie 48883 done.
Movie 48982 done.
Movie 48997 done.
Movie 49013 done.
Movie 4911

Movie 59295 done.
Movie 59306 done.
Movie 59315 done.
Movie 59333 done.
Movie 59336 done.
Movie 59369 done.
Movie 59387 done.
Movie 59421 done.
Movie 59429 done.
Movie 59440 done.
Movie 59501 done.
Movie 59549 done.
Movie 59604 done.
Movie 59615 done.
Movie 59667 done.
Movie 59725 done.
Movie 59727 done.
Movie 59731 done.
Movie 59738 done.
Movie 59784 done.
Movie 59810 done.
Movie 59814 done.
Movie 59900 done.
Movie 59915 done.
Movie 59947 done.
Movie 59985 done.
Movie 59995 done.
Movie 60030 done.
Movie 60037 done.
Movie 60040 done.
Movie 60046 done.
Movie 60069 done.
Movie 60072 done.
Movie 60074 done.
Movie 60126 done.
Movie 60128 done.
Movie 60141 done.
Movie 60161 done.
Movie 60289 done.
Movie 60291 done.
Movie 60293 done.
Movie 60303 done.
Movie 60333 done.
Movie 60363 done.
Movie 60365 done.
Movie 60389 done.
Movie 60397 done.
Movie 60408 done.
Movie 60471 done.
Movie 60487 done.
Movie 60514 done.
Movie 60516 done.
Movie 60522 done.
Movie 60538 done.
Movie 60647 done.
Movie 6067

Movie 72624 done.
Movie 72641 done.
Movie 72692 done.
Movie 72694 done.
Movie 72696 done.
Movie 72701 done.
Movie 72714 done.
Movie 72720 done.
Movie 72731 done.
Movie 72733 done.
Movie 72737 done.
Movie 72874 done.
Movie 72919 done.
Movie 72921 done.
Movie 72982 done.
Movie 72998 done.
Movie 73015 done.
Movie 73017 done.
Movie 73023 done.
Movie 73042 done.
Movie 73106 done.
Movie 73160 done.
Movie 73211 done.
Movie 73266 done.
Movie 73268 done.
Movie 73290 done.
Movie 73319 done.
Movie 73321 done.
Movie 73323 done.
Movie 73344 done.
Movie 73386 done.
Movie 73431 done.
Movie 73488 done.
Movie 73499 done.
Movie 73501 done.
Movie 73515 done.
Movie 73569 done.
Movie 73676 done.
Movie 73681 done.
Movie 73741 done.
Movie 73804 done.
Movie 73808 done.
Movie 73822 done.
Movie 73854 done.
Movie 73858 done.
Movie 73876 done.
Movie 73881 done.
Movie 73929 done.
Movie 74075 done.
Movie 74089 done.
Movie 74095 done.
Movie 74154 done.
Movie 74226 done.
Movie 74228 done.
Movie 74275 done.
Movie 7428

Movie 88356 done.
Movie 88405 done.
Movie 88448 done.
Movie 88515 done.
Movie 88593 done.
Movie 88672 done.
Movie 88697 done.
Movie 88699 done.
Movie 88744 done.
Movie 88746 done.
Movie 88785 done.
Movie 88810 done.
Movie 88812 done.
Movie 88911 done.
Movie 88932 done.
Movie 88954 done.
Movie 89028 done.
Movie 89030 done.
Movie 89039 done.
Movie 89047 done.
Movie 89072 done.
Movie 89085 done.
Movie 89087 done.
Movie 89090 done.
Movie 89118 done.
Movie 89190 done.
Movie 89208 done.
Movie 89281 done.
Movie 89305 done.
Movie 89343 done.
Movie 89386 done.
Movie 89388 done.
Movie 89427 done.
Movie 89470 done.
Movie 89492 done.
Movie 89580 done.
Movie 89582 done.
Movie 89586 done.
Movie 89678 done.
Movie 89745 done.
Movie 89753 done.
Movie 89759 done.
Movie 89761 done.
Movie 89774 done.
Movie 89804 done.
Movie 89837 done.
Movie 89840 done.
Movie 89862 done.
Movie 89864 done.
Movie 89898 done.
Movie 89904 done.
Movie 89939 done.
Movie 89945 done.
Movie 90057 done.
Movie 90243 done.
Movie 9024

Movie 100579 done.
Movie 100611 done.
Movie 100714 done.
Movie 100737 done.
Movie 100810 done.
Movie 100843 done.
Movie 100882 done.
Movie 100906 done.
Movie 101025 done.
Movie 101070 done.
Movie 101072 done.
Movie 101074 done.
Movie 101076 done.
Movie 101088 done.
Movie 101112 done.
Movie 101142 done.
Movie 101283 done.
Movie 101360 done.
Movie 101362 done.
Movie 101415 done.
Movie 101423 done.
Movie 101525 done.
Movie 101529 done.
Movie 101531 done.
Movie 101577 done.
Movie 101612 done.
Movie 101739 done.
Movie 101741 done.
Movie 101765 done.
Movie 101864 done.
Movie 101884 done.
Movie 101895 done.
Movie 101962 done.
Movie 101973 done.
Movie 102007 done.
Movie 102025 done.
Movie 102033 done.
Movie 102058 done.
Movie 102066 done.
Movie 102070 done.
Movie 102084 done.
Movie 102088 done.
Movie 102123 done.
Movie 102125 done.
Movie 102165 done.
Movie 102194 done.
Movie 102217 done.
Movie 102278 done.
Movie 102338 done.
Movie 102378 done.
Movie 102407 done.
Movie 102445 done.
Movie 102481

Movie 115203 done.
Movie 115210 done.
Movie 115216 done.
Movie 115231 done.
Movie 115502 done.
Movie 115569 done.
Movie 115617 done.
Movie 115664 done.
Movie 115667 done.
Movie 115680 done.
Movie 115713 done.
Movie 115727 done.
Movie 115819 done.
Movie 115828 done.
Movie 115877 done.
Movie 115969 done.
Movie 116044 done.
Movie 116138 done.
Movie 116169 done.
Movie 116207 done.
Movie 116411 done.
Movie 116413 done.
Movie 116419 done.
Movie 116505 done.
Movie 116529 done.
Movie 116668 done.
Movie 116718 done.
Movie 116724 done.
Movie 116738 done.
Movie 116797 done.
Movie 116799 done.
Movie 116817 done.
Movie 116823 done.
Movie 116849 done.
Movie 116887 done.
Movie 116897 done.
Movie 116941 done.
Movie 116963 done.
Movie 116977 done.
Movie 116985 done.
Movie 117107 done.
Movie 117109 done.
Movie 117133 done.
Movie 117176 done.
Movie 117192 done.
Movie 117364 done.
Movie 117368 done.
Movie 117444 done.
Movie 117466 done.
Movie 117511 done.
Movie 117529 done.
Movie 117531 done.
Movie 117533

Movie 137859 done.
Movie 137863 done.
Movie 138036 done.
Movie 138186 done.
Movie 138204 done.
Movie 138208 done.
Movie 138210 done.
Movie 138396 done.
Movie 138546 done.
Movie 138610 done.
Movie 138632 done.
Movie 138702 done.
Movie 138798 done.
Movie 138835 done.
Movie 138966 done.
Movie 139052 done.
Movie 139130 done.
Movie 139157 done.
Movie 139385 done.
Movie 139415 done.
Movie 139511 done.
Movie 139640 done.
Movie 139642 done.
Movie 139644 done.
Movie 139655 done.
Movie 139717 done.
Movie 139747 done.
Movie 139855 done.
Movie 139857 done.
Movie 139859 done.
Movie 139915 done.
Movie 139994 done.
Movie 140016 done.
Movie 140038 done.
Movie 140110 done.
Movie 140133 done.
Movie 140162 done.
Movie 140174 done.
Movie 140237 done.
Movie 140247 done.
Movie 140265 done.
Movie 140267 done.
Movie 140289 done.
Movie 140301 done.
Movie 140359 done.
Movie 140481 done.
Movie 140523 done.
Movie 140525 done.
Movie 140541 done.
Movie 140561 done.
Movie 140627 done.
Movie 140711 done.
Movie 140715

Movie 164909 done.
Movie 164917 done.
Movie 165075 done.
Movie 165101 done.
Movie 165103 done.
Movie 165139 done.
Movie 165343 done.
Movie 165347 done.
Movie 165483 done.
Movie 165489 done.
Movie 165529 done.
Movie 165549 done.
Movie 165551 done.
Movie 165635 done.
Movie 165639 done.
Movie 165645 done.
Movie 165671 done.
Movie 165843 done.
Movie 165947 done.
Movie 165959 done.
Movie 165969 done.
Movie 166015 done.
Movie 166024 done.
Movie 166183 done.
Movie 166203 done.
Movie 166291 done.
Movie 166461 done.
Movie 166492 done.
Movie 166526 done.
Movie 166528 done.
Movie 166534 done.
Movie 166558 done.
Movie 166568 done.
Movie 166635 done.
Movie 166643 done.
Movie 166705 done.
Movie 166946 done.
Movie 167018 done.
Movie 167036 done.
Movie 167064 done.
Movie 167296 done.
Movie 167370 done.
Movie 167380 done.
Movie 167538 done.
Movie 167570 done.
Movie 167634 done.
Movie 167706 done.
Movie 167732 done.
Movie 167746 done.
Movie 167772 done.
Movie 167790 done.
Movie 167854 done.
Movie 168026

In [49]:
genre[0]

[1,
 [2294,
  3114,
  3754,
  4016,
  4886,
  45074,
  53121,
  65577,
  91355,
  103755,
  136016,
  166461,
  134853,
  2033,
  2116,
  3400,
  4366,
  4519,
  5672,
  6536]]

## Clustering over Movie Tags

In [13]:
tags = pd.read_csv('Data/tags.csv', sep=',')
tags.tag = tags.tag.str.lower()
tags.tag = tags.tag.str.replace(" ","")
tags.tag = tags.tag.str.replace("'","")

def strip_character(data):
    r = re.compile(r'[^a-z !@#$%&*_+-=|\:;<>,./()[\]{}\"]')
    return r.sub('', data)

tags.tag = tags.tag.apply(strip_character)

tags

Unnamed: 0,userId,movieId,tag,timestamp
0,2,60756,funny,1445714994
1,2,60756,highlyquotable,1445714996
2,2,60756,willferrell,1445714992
3,2,89774,boxingstory,1445715207
4,2,89774,mma,1445715200
...,...,...,...,...
3678,606,7382,forkatie,1171234019
3679,606,7936,austere,1173392334
3680,610,3265,gunfu,1493843984
3681,610,3265,heroicbloodshed,1493843978


In [14]:
tags = tags.drop(['timestamp'],axis=1)
tags = tags.groupby(['movieId'], as_index=False)['userId','tag'].agg(lambda x: list(x))
tags

Unnamed: 0,movieId,userId,tag
0,1,"[336, 474, 567]","[pixar, pixar, fun]"
1,2,"[62, 62, 62, 474]","[fantasy, magicboardgame, robinwilliams, game]"
2,3,"[289, 289]","[moldy, old]"
3,5,"[474, 474]","[pregnancy, remake]"
4,7,[474],[remake]
...,...,...,...
1567,183611,"[62, 62, 62]","[comedy, funny, rachelmcadams]"
1568,184471,"[62, 62, 62]","[adventure, aliciavikander, videogameadaptation]"
1569,187593,"[62, 62, 62]","[joshbrolin, ryanreynolds, sarcasm]"
1570,187595,"[62, 62]","[emiliaclarke, starwars]"


In [15]:
def removeDuplicate(data):
    temp = ""
    for i in data:
        if str(i) not in temp:
                temp += str(i) + "|"
    return temp[:len(temp)-1]

tags.userId = tags.userId.apply(removeDuplicate)
tags.tag = tags.tag.apply(removeDuplicate)
tags

Unnamed: 0,movieId,userId,tag
0,1,336|474|567,pixar|fun
1,2,62|474,fantasy|magicboardgame|robinwilliams
2,3,289,moldy
3,5,474,pregnancy|remake
4,7,474,remake
...,...,...,...
1567,183611,62,comedy|funny|rachelmcadams
1568,184471,62,adventure|aliciavikander|videogameadaptation
1569,187593,62,joshbrolin|ryanreynolds|sarcasm
1570,187595,62,emiliaclarke|starwars


In [16]:
v2 = TfidfVectorizer()
tfidf2 = v2.fit_transform(tags.tag)
tfidf2.shape

(1572, 1495)

In [17]:
cos_sim2 = linear_kernel(tfidf2, tfidf2)
cos_sim2[0]

array([1., 0., 0., ..., 0., 0., 0.])

In [18]:
cosine_similarity2 = pd.DataFrame(data=cos_sim2, index=tags.movieId, columns=tags.movieId)
cosine_similarity2

movieId,1,2,3,5,7,11,14,16,17,21,...,176371,176419,179401,180031,180985,183611,184471,187593,187595,193565
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000
2,0.0,1.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000
3,0.0,0.0,1.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000
5,0.0,0.0,0.0,1.000000,0.680832,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000
7,0.0,0.0,0.0,0.680832,1.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183611,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.437843,0.0,0.0,1.000000,0.0,0.0,0.0,0.206446
184471,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,1.0,0.0,0.0,0.000000
187593,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,1.0,0.0,0.000000
187595,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,1.0,0.000000


In [20]:
tag = []
for movie_id in predict.columns:
    sim = []
    if movie_id in cosine_similarity2.columns:
        for i in cosine_similarity2.columns:
            cos_sim = cosine_similarity2[i].loc[movie_id]
            if movie_id != i:
                sim.append([i,cos_sim])
        sim.sort(key = lambda sim: sim[1], reverse=True)
        sim = np.array(sim[:20])
        sim = sim[:,0].tolist()        
        sim = [int(x) for x in sim]
    tag.append([movie_id,copy.deepcopy(sim)])
    print('Movie',movie_id,'done.')

Movie 1 done.
Movie 2 done.
Movie 3 done.
Movie 4 done.
Movie 5 done.
Movie 6 done.
Movie 7 done.
Movie 8 done.
Movie 9 done.
Movie 10 done.
Movie 11 done.
Movie 12 done.
Movie 13 done.
Movie 14 done.
Movie 15 done.
Movie 16 done.
Movie 17 done.
Movie 18 done.
Movie 19 done.
Movie 20 done.
Movie 21 done.
Movie 22 done.
Movie 23 done.
Movie 24 done.
Movie 25 done.
Movie 26 done.
Movie 27 done.
Movie 28 done.
Movie 29 done.
Movie 30 done.
Movie 31 done.
Movie 32 done.
Movie 34 done.
Movie 36 done.
Movie 38 done.
Movie 39 done.
Movie 40 done.
Movie 41 done.
Movie 42 done.
Movie 43 done.
Movie 44 done.
Movie 45 done.
Movie 46 done.
Movie 47 done.
Movie 48 done.
Movie 49 done.
Movie 50 done.
Movie 52 done.
Movie 53 done.
Movie 54 done.
Movie 55 done.
Movie 57 done.
Movie 58 done.
Movie 60 done.
Movie 61 done.
Movie 62 done.
Movie 63 done.
Movie 64 done.
Movie 65 done.
Movie 66 done.
Movie 68 done.
Movie 69 done.
Movie 70 done.
Movie 71 done.
Movie 72 done.
Movie 73 done.
Movie 74 done.
Movi

Movie 608 done.
Movie 609 done.
Movie 610 done.
Movie 611 done.
Movie 612 done.
Movie 613 done.
Movie 615 done.
Movie 616 done.
Movie 617 done.
Movie 618 done.
Movie 619 done.
Movie 626 done.
Movie 627 done.
Movie 628 done.
Movie 631 done.
Movie 632 done.
Movie 633 done.
Movie 634 done.
Movie 635 done.
Movie 636 done.
Movie 637 done.
Movie 638 done.
Movie 639 done.
Movie 640 done.
Movie 645 done.
Movie 647 done.
Movie 648 done.
Movie 649 done.
Movie 650 done.
Movie 653 done.
Movie 656 done.
Movie 661 done.
Movie 662 done.
Movie 663 done.
Movie 665 done.
Movie 667 done.
Movie 668 done.
Movie 670 done.
Movie 671 done.
Movie 673 done.
Movie 674 done.
Movie 678 done.
Movie 679 done.
Movie 680 done.
Movie 685 done.
Movie 688 done.
Movie 691 done.
Movie 692 done.
Movie 694 done.
Movie 695 done.
Movie 697 done.
Movie 698 done.
Movie 700 done.
Movie 703 done.
Movie 704 done.
Movie 706 done.
Movie 707 done.
Movie 708 done.
Movie 709 done.
Movie 710 done.
Movie 711 done.
Movie 714 done.
Movie 71

Movie 1339 done.
Movie 1340 done.
Movie 1341 done.
Movie 1342 done.
Movie 1343 done.
Movie 1344 done.
Movie 1345 done.
Movie 1346 done.
Movie 1347 done.
Movie 1348 done.
Movie 1349 done.
Movie 1350 done.
Movie 1351 done.
Movie 1352 done.
Movie 1353 done.
Movie 1354 done.
Movie 1355 done.
Movie 1356 done.
Movie 1357 done.
Movie 1358 done.
Movie 1359 done.
Movie 1361 done.
Movie 1363 done.
Movie 1365 done.
Movie 1366 done.
Movie 1367 done.
Movie 1370 done.
Movie 1371 done.
Movie 1372 done.
Movie 1373 done.
Movie 1374 done.
Movie 1375 done.
Movie 1376 done.
Movie 1377 done.
Movie 1378 done.
Movie 1379 done.
Movie 1380 done.
Movie 1381 done.
Movie 1382 done.
Movie 1385 done.
Movie 1387 done.
Movie 1388 done.
Movie 1389 done.
Movie 1390 done.
Movie 1391 done.
Movie 1392 done.
Movie 1393 done.
Movie 1394 done.
Movie 1395 done.
Movie 1396 done.
Movie 1397 done.
Movie 1398 done.
Movie 1399 done.
Movie 1401 done.
Movie 1405 done.
Movie 1406 done.
Movie 1407 done.
Movie 1408 done.
Movie 1409 don

Movie 2059 done.
Movie 2060 done.
Movie 2064 done.
Movie 2065 done.
Movie 2066 done.
Movie 2067 done.
Movie 2068 done.
Movie 2069 done.
Movie 2070 done.
Movie 2071 done.
Movie 2072 done.
Movie 2073 done.
Movie 2074 done.
Movie 2075 done.
Movie 2076 done.
Movie 2077 done.
Movie 2078 done.
Movie 2080 done.
Movie 2081 done.
Movie 2082 done.
Movie 2083 done.
Movie 2084 done.
Movie 2085 done.
Movie 2087 done.
Movie 2088 done.
Movie 2089 done.
Movie 2090 done.
Movie 2091 done.
Movie 2092 done.
Movie 2093 done.
Movie 2094 done.
Movie 2095 done.
Movie 2096 done.
Movie 2097 done.
Movie 2098 done.
Movie 2099 done.
Movie 2100 done.
Movie 2102 done.
Movie 2103 done.
Movie 2104 done.
Movie 2105 done.
Movie 2106 done.
Movie 2107 done.
Movie 2108 done.
Movie 2109 done.
Movie 2110 done.
Movie 2111 done.
Movie 2112 done.
Movie 2114 done.
Movie 2115 done.
Movie 2116 done.
Movie 2117 done.
Movie 2118 done.
Movie 2119 done.
Movie 2120 done.
Movie 2121 done.
Movie 2122 done.
Movie 2123 done.
Movie 2124 don

Movie 2674 done.
Movie 2676 done.
Movie 2677 done.
Movie 2681 done.
Movie 2682 done.
Movie 2683 done.
Movie 2686 done.
Movie 2687 done.
Movie 2688 done.
Movie 2690 done.
Movie 2691 done.
Movie 2692 done.
Movie 2693 done.
Movie 2694 done.
Movie 2695 done.
Movie 2696 done.
Movie 2697 done.
Movie 2698 done.
Movie 2699 done.
Movie 2700 done.
Movie 2701 done.
Movie 2702 done.
Movie 2706 done.
Movie 2707 done.
Movie 2708 done.
Movie 2709 done.
Movie 2710 done.
Movie 2712 done.
Movie 2713 done.
Movie 2716 done.
Movie 2717 done.
Movie 2718 done.
Movie 2719 done.
Movie 2720 done.
Movie 2722 done.
Movie 2723 done.
Movie 2724 done.
Movie 2725 done.
Movie 2726 done.
Movie 2727 done.
Movie 2728 done.
Movie 2729 done.
Movie 2730 done.
Movie 2731 done.
Movie 2732 done.
Movie 2733 done.
Movie 2734 done.
Movie 2735 done.
Movie 2736 done.
Movie 2737 done.
Movie 2738 done.
Movie 2739 done.
Movie 2740 done.
Movie 2741 done.
Movie 2742 done.
Movie 2743 done.
Movie 2744 done.
Movie 2745 done.
Movie 2746 don

Movie 3341 done.
Movie 3342 done.
Movie 3344 done.
Movie 3345 done.
Movie 3347 done.
Movie 3350 done.
Movie 3353 done.
Movie 3354 done.
Movie 3355 done.
Movie 3357 done.
Movie 3358 done.
Movie 3359 done.
Movie 3360 done.
Movie 3361 done.
Movie 3362 done.
Movie 3363 done.
Movie 3364 done.
Movie 3365 done.
Movie 3368 done.
Movie 3370 done.
Movie 3372 done.
Movie 3374 done.
Movie 3378 done.
Movie 3379 done.
Movie 3384 done.
Movie 3385 done.
Movie 3386 done.
Movie 3387 done.
Movie 3388 done.
Movie 3389 done.
Movie 3390 done.
Movie 3391 done.
Movie 3392 done.
Movie 3393 done.
Movie 3394 done.
Movie 3395 done.
Movie 3396 done.
Movie 3397 done.
Movie 3398 done.
Movie 3400 done.
Movie 3401 done.
Movie 3402 done.
Movie 3403 done.
Movie 3404 done.
Movie 3405 done.
Movie 3406 done.
Movie 3408 done.
Movie 3409 done.
Movie 3410 done.
Movie 3412 done.
Movie 3414 done.
Movie 3415 done.
Movie 3417 done.
Movie 3418 done.
Movie 3420 done.
Movie 3421 done.
Movie 3422 done.
Movie 3423 done.
Movie 3424 don

Movie 4012 done.
Movie 4014 done.
Movie 4015 done.
Movie 4016 done.
Movie 4017 done.
Movie 4018 done.
Movie 4019 done.
Movie 4020 done.
Movie 4021 done.
Movie 4022 done.
Movie 4023 done.
Movie 4024 done.
Movie 4025 done.
Movie 4027 done.
Movie 4029 done.
Movie 4030 done.
Movie 4031 done.
Movie 4032 done.
Movie 4033 done.
Movie 4034 done.
Movie 4035 done.
Movie 4036 done.
Movie 4037 done.
Movie 4039 done.
Movie 4040 done.
Movie 4041 done.
Movie 4042 done.
Movie 4043 done.
Movie 4046 done.
Movie 4047 done.
Movie 4051 done.
Movie 4052 done.
Movie 4053 done.
Movie 4054 done.
Movie 4055 done.
Movie 4056 done.
Movie 4061 done.
Movie 4062 done.
Movie 4063 done.
Movie 4064 done.
Movie 4065 done.
Movie 4066 done.
Movie 4067 done.
Movie 4068 done.
Movie 4069 done.
Movie 4074 done.
Movie 4077 done.
Movie 4078 done.
Movie 4079 done.
Movie 4080 done.
Movie 4081 done.
Movie 4082 done.
Movie 4083 done.
Movie 4084 done.
Movie 4085 done.
Movie 4086 done.
Movie 4089 done.
Movie 4090 done.
Movie 4091 don

Movie 4801 done.
Movie 4802 done.
Movie 4803 done.
Movie 4804 done.
Movie 4808 done.
Movie 4809 done.
Movie 4810 done.
Movie 4811 done.
Movie 4812 done.
Movie 4813 done.
Movie 4814 done.
Movie 4815 done.
Movie 4816 done.
Movie 4818 done.
Movie 4821 done.
Movie 4822 done.
Movie 4823 done.
Movie 4825 done.
Movie 4826 done.
Movie 4827 done.
Movie 4828 done.
Movie 4830 done.
Movie 4831 done.
Movie 4833 done.
Movie 4835 done.
Movie 4836 done.
Movie 4840 done.
Movie 4844 done.
Movie 4845 done.
Movie 4846 done.
Movie 4847 done.
Movie 4848 done.
Movie 4849 done.
Movie 4850 done.
Movie 4852 done.
Movie 4855 done.
Movie 4857 done.
Movie 4860 done.
Movie 4862 done.
Movie 4863 done.
Movie 4864 done.
Movie 4865 done.
Movie 4866 done.
Movie 4867 done.
Movie 4871 done.
Movie 4873 done.
Movie 4874 done.
Movie 4876 done.
Movie 4877 done.
Movie 4878 done.
Movie 4879 done.
Movie 4880 done.
Movie 4881 done.
Movie 4883 done.
Movie 4885 done.
Movie 4886 done.
Movie 4887 done.
Movie 4888 done.
Movie 4889 don

Movie 5669 done.
Movie 5670 done.
Movie 5672 done.
Movie 5673 done.
Movie 5675 done.
Movie 5678 done.
Movie 5679 done.
Movie 5680 done.
Movie 5682 done.
Movie 5684 done.
Movie 5685 done.
Movie 5688 done.
Movie 5689 done.
Movie 5690 done.
Movie 5693 done.
Movie 5694 done.
Movie 5696 done.
Movie 5699 done.
Movie 5700 done.
Movie 5704 done.
Movie 5706 done.
Movie 5707 done.
Movie 5710 done.
Movie 5712 done.
Movie 5720 done.
Movie 5723 done.
Movie 5733 done.
Movie 5735 done.
Movie 5736 done.
Movie 5741 done.
Movie 5742 done.
Movie 5745 done.
Movie 5746 done.
Movie 5747 done.
Movie 5749 done.
Movie 5752 done.
Movie 5755 done.
Movie 5764 done.
Movie 5767 done.
Movie 5768 done.
Movie 5771 done.
Movie 5772 done.
Movie 5773 done.
Movie 5779 done.
Movie 5780 done.
Movie 5782 done.
Movie 5784 done.
Movie 5785 done.
Movie 5786 done.
Movie 5787 done.
Movie 5788 done.
Movie 5791 done.
Movie 5792 done.
Movie 5796 done.
Movie 5797 done.
Movie 5799 done.
Movie 5801 done.
Movie 5802 done.
Movie 5803 don

Movie 6660 done.
Movie 6662 done.
Movie 6663 done.
Movie 6664 done.
Movie 6665 done.
Movie 6666 done.
Movie 6667 done.
Movie 6669 done.
Movie 6670 done.
Movie 6671 done.
Movie 6678 done.
Movie 6679 done.
Movie 6684 done.
Movie 6686 done.
Movie 6687 done.
Movie 6688 done.
Movie 6689 done.
Movie 6691 done.
Movie 6692 done.
Movie 6695 done.
Movie 6696 done.
Movie 6699 done.
Movie 6702 done.
Movie 6705 done.
Movie 6706 done.
Movie 6707 done.
Movie 6708 done.
Movie 6709 done.
Movie 6710 done.
Movie 6711 done.
Movie 6713 done.
Movie 6715 done.
Movie 6718 done.
Movie 6720 done.
Movie 6721 done.
Movie 6722 done.
Movie 6723 done.
Movie 6724 done.
Movie 6731 done.
Movie 6732 done.
Movie 6734 done.
Movie 6744 done.
Movie 6746 done.
Movie 6748 done.
Movie 6750 done.
Movie 6751 done.
Movie 6752 done.
Movie 6753 done.
Movie 6754 done.
Movie 6755 done.
Movie 6760 done.
Movie 6763 done.
Movie 6764 done.
Movie 6765 done.
Movie 6768 done.
Movie 6769 done.
Movie 6770 done.
Movie 6772 done.
Movie 6773 don

Movie 7705 done.
Movie 7706 done.
Movie 7707 done.
Movie 7708 done.
Movie 7713 done.
Movie 7714 done.
Movie 7716 done.
Movie 7720 done.
Movie 7727 done.
Movie 7728 done.
Movie 7730 done.
Movie 7742 done.
Movie 7743 done.
Movie 7745 done.
Movie 7748 done.
Movie 7749 done.
Movie 7753 done.
Movie 7756 done.
Movie 7757 done.
Movie 7762 done.
Movie 7766 done.
Movie 7767 done.
Movie 7772 done.
Movie 7773 done.
Movie 7781 done.
Movie 7782 done.
Movie 7786 done.
Movie 7789 done.
Movie 7802 done.
Movie 7810 done.
Movie 7811 done.
Movie 7812 done.
Movie 7815 done.
Movie 7820 done.
Movie 7822 done.
Movie 7826 done.
Movie 7831 done.
Movie 7832 done.
Movie 7833 done.
Movie 7834 done.
Movie 7835 done.
Movie 7839 done.
Movie 7840 done.
Movie 7841 done.
Movie 7842 done.
Movie 7843 done.
Movie 7844 done.
Movie 7845 done.
Movie 7846 done.
Movie 7879 done.
Movie 7882 done.
Movie 7883 done.
Movie 7884 done.
Movie 7888 done.
Movie 7889 done.
Movie 7891 done.
Movie 7894 done.
Movie 7895 done.
Movie 7896 don

Movie 26662 done.
Movie 26676 done.
Movie 26680 done.
Movie 26681 done.
Movie 26686 done.
Movie 26693 done.
Movie 26694 done.
Movie 26695 done.
Movie 26696 done.
Movie 26700 done.
Movie 26701 done.
Movie 26704 done.
Movie 26712 done.
Movie 26713 done.
Movie 26717 done.
Movie 26726 done.
Movie 26729 done.
Movie 26732 done.
Movie 26736 done.
Movie 26741 done.
Movie 26743 done.
Movie 26745 done.
Movie 26750 done.
Movie 26761 done.
Movie 26764 done.
Movie 26765 done.
Movie 26776 done.
Movie 26777 done.
Movie 26778 done.
Movie 26782 done.
Movie 26791 done.
Movie 26792 done.
Movie 26796 done.
Movie 26797 done.
Movie 26810 done.
Movie 26812 done.
Movie 26819 done.
Movie 26828 done.
Movie 26838 done.
Movie 26840 done.
Movie 26849 done.
Movie 26854 done.
Movie 26861 done.
Movie 26865 done.
Movie 26870 done.
Movie 26871 done.
Movie 26875 done.
Movie 26887 done.
Movie 26900 done.
Movie 26901 done.
Movie 26903 done.
Movie 26913 done.
Movie 26928 done.
Movie 26940 done.
Movie 26947 done.
Movie 2695

Movie 37741 done.
Movie 37830 done.
Movie 37844 done.
Movie 37853 done.
Movie 37857 done.
Movie 38038 done.
Movie 38061 done.
Movie 38095 done.
Movie 38159 done.
Movie 38164 done.
Movie 38198 done.
Movie 38294 done.
Movie 38304 done.
Movie 38388 done.
Movie 38583 done.
Movie 38798 done.
Movie 38886 done.
Movie 38992 done.
Movie 39183 done.
Movie 39231 done.
Movie 39234 done.
Movie 39292 done.
Movie 39307 done.
Movie 39381 done.
Movie 39400 done.
Movie 39414 done.
Movie 39427 done.
Movie 39435 done.
Movie 39444 done.
Movie 39446 done.
Movie 39449 done.
Movie 39516 done.
Movie 39715 done.
Movie 39801 done.
Movie 39869 done.
Movie 40148 done.
Movie 40278 done.
Movie 40339 done.
Movie 40412 done.
Movie 40414 done.
Movie 40478 done.
Movie 40491 done.
Movie 40578 done.
Movie 40581 done.
Movie 40583 done.
Movie 40597 done.
Movie 40614 done.
Movie 40617 done.
Movie 40629 done.
Movie 40697 done.
Movie 40723 done.
Movie 40732 done.
Movie 40815 done.
Movie 40819 done.
Movie 40826 done.
Movie 4085

Movie 52867 done.
Movie 52885 done.
Movie 52950 done.
Movie 52952 done.
Movie 52967 done.
Movie 52973 done.
Movie 52975 done.
Movie 53000 done.
Movie 53022 done.
Movie 53024 done.
Movie 53121 done.
Movie 53123 done.
Movie 53125 done.
Movie 53127 done.
Movie 53129 done.
Movie 53138 done.
Movie 53140 done.
Movie 53143 done.
Movie 53161 done.
Movie 53280 done.
Movie 53318 done.
Movie 53322 done.
Movie 53326 done.
Movie 53355 done.
Movie 53435 done.
Movie 53447 done.
Movie 53450 done.
Movie 53453 done.
Movie 53460 done.
Movie 53464 done.
Movie 53466 done.
Movie 53468 done.
Movie 53519 done.
Movie 53550 done.
Movie 53574 done.
Movie 53578 done.
Movie 53808 done.
Movie 53883 done.
Movie 53894 done.
Movie 53921 done.
Movie 53953 done.
Movie 53956 done.
Movie 53972 done.
Movie 53974 done.
Movie 53993 done.
Movie 53996 done.
Movie 54001 done.
Movie 54004 done.
Movie 54116 done.
Movie 54121 done.
Movie 54185 done.
Movie 54190 done.
Movie 54256 done.
Movie 54259 done.
Movie 54272 done.
Movie 5427

Movie 66097 done.
Movie 66171 done.
Movie 66198 done.
Movie 66203 done.
Movie 66240 done.
Movie 66297 done.
Movie 66310 done.
Movie 66320 done.
Movie 66335 done.
Movie 66371 done.
Movie 66427 done.
Movie 66509 done.
Movie 66511 done.
Movie 66544 done.
Movie 66665 done.
Movie 66744 done.
Movie 66783 done.
Movie 66785 done.
Movie 66798 done.
Movie 66915 done.
Movie 66934 done.
Movie 66943 done.
Movie 67087 done.
Movie 67168 done.
Movie 67186 done.
Movie 67193 done.
Movie 67197 done.
Movie 67255 done.
Movie 67267 done.
Movie 67295 done.
Movie 67361 done.
Movie 67408 done.
Movie 67508 done.
Movie 67534 done.
Movie 67618 done.
Movie 67665 done.
Movie 67695 done.
Movie 67734 done.
Movie 67788 done.
Movie 67799 done.
Movie 67888 done.
Movie 67923 done.
Movie 67997 done.
Movie 68073 done.
Movie 68135 done.
Movie 68157 done.
Movie 68159 done.
Movie 68194 done.
Movie 68205 done.
Movie 68237 done.
Movie 68269 done.
Movie 68319 done.
Movie 68347 done.
Movie 68358 done.
Movie 68444 done.
Movie 6848

Movie 80906 done.
Movie 80917 done.
Movie 80969 done.
Movie 81018 done.
Movie 81087 done.
Movie 81132 done.
Movie 81156 done.
Movie 81158 done.
Movie 81191 done.
Movie 81229 done.
Movie 81257 done.
Movie 81383 done.
Movie 81417 done.
Movie 81456 done.
Movie 81512 done.
Movie 81520 done.
Movie 81535 done.
Movie 81537 done.
Movie 81562 done.
Movie 81564 done.
Movie 81591 done.
Movie 81681 done.
Movie 81782 done.
Movie 81784 done.
Movie 81786 done.
Movie 81788 done.
Movie 81791 done.
Movie 81819 done.
Movie 81831 done.
Movie 81834 done.
Movie 81845 done.
Movie 81847 done.
Movie 81910 done.
Movie 81932 done.
Movie 81949 done.
Movie 82041 done.
Movie 82053 done.
Movie 82088 done.
Movie 82093 done.
Movie 82095 done.
Movie 82152 done.
Movie 82167 done.
Movie 82169 done.
Movie 82202 done.
Movie 82242 done.
Movie 82366 done.
Movie 82378 done.
Movie 82459 done.
Movie 82461 done.
Movie 82499 done.
Movie 82527 done.
Movie 82534 done.
Movie 82641 done.
Movie 82667 done.
Movie 82684 done.
Movie 8274

Movie 94959 done.
Movie 94985 done.
Movie 95004 done.
Movie 95067 done.
Movie 95088 done.
Movie 95105 done.
Movie 95135 done.
Movie 95145 done.
Movie 95147 done.
Movie 95149 done.
Movie 95163 done.
Movie 95165 done.
Movie 95167 done.
Movie 95170 done.
Movie 95175 done.
Movie 95182 done.
Movie 95193 done.
Movie 95199 done.
Movie 95201 done.
Movie 95207 done.
Movie 95218 done.
Movie 95307 done.
Movie 95309 done.
Movie 95311 done.
Movie 95313 done.
Movie 95377 done.
Movie 95441 done.
Movie 95449 done.
Movie 95473 done.
Movie 95475 done.
Movie 95497 done.
Movie 95499 done.
Movie 95508 done.
Movie 95510 done.
Movie 95519 done.
Movie 95543 done.
Movie 95558 done.
Movie 95583 done.
Movie 95624 done.
Movie 95633 done.
Movie 95654 done.
Movie 95690 done.
Movie 95717 done.
Movie 95720 done.
Movie 95738 done.
Movie 95744 done.
Movie 95761 done.
Movie 95771 done.
Movie 95780 done.
Movie 95796 done.
Movie 95839 done.
Movie 95843 done.
Movie 95858 done.
Movie 95873 done.
Movie 95875 done.
Movie 9593

Movie 108981 done.
Movie 109042 done.
Movie 109161 done.
Movie 109183 done.
Movie 109187 done.
Movie 109191 done.
Movie 109241 done.
Movie 109282 done.
Movie 109295 done.
Movie 109313 done.
Movie 109317 done.
Movie 109372 done.
Movie 109374 done.
Movie 109383 done.
Movie 109416 done.
Movie 109483 done.
Movie 109487 done.
Movie 109569 done.
Movie 109576 done.
Movie 109578 done.
Movie 109596 done.
Movie 109633 done.
Movie 109673 done.
Movie 109687 done.
Movie 109723 done.
Movie 109846 done.
Movie 109848 done.
Movie 109850 done.
Movie 109853 done.
Movie 109864 done.
Movie 109895 done.
Movie 109897 done.
Movie 109941 done.
Movie 109968 done.
Movie 109971 done.
Movie 110102 done.
Movie 110127 done.
Movie 110130 done.
Movie 110281 done.
Movie 110286 done.
Movie 110297 done.
Movie 110330 done.
Movie 110350 done.
Movie 110387 done.
Movie 110501 done.
Movie 110541 done.
Movie 110553 done.
Movie 110586 done.
Movie 110591 done.
Movie 110603 done.
Movie 110611 done.
Movie 110655 done.
Movie 110669

Movie 134130 done.
Movie 134158 done.
Movie 134170 done.
Movie 134184 done.
Movie 134214 done.
Movie 134246 done.
Movie 134248 done.
Movie 134252 done.
Movie 134326 done.
Movie 134334 done.
Movie 134368 done.
Movie 134393 done.
Movie 134515 done.
Movie 134524 done.
Movie 134528 done.
Movie 134775 done.
Movie 134783 done.
Movie 134796 done.
Movie 134808 done.
Movie 134847 done.
Movie 134849 done.
Movie 134853 done.
Movie 134859 done.
Movie 134861 done.
Movie 134881 done.
Movie 135133 done.
Movie 135137 done.
Movie 135143 done.
Movie 135198 done.
Movie 135216 done.
Movie 135288 done.
Movie 135436 done.
Movie 135456 done.
Movie 135518 done.
Movie 135532 done.
Movie 135534 done.
Movie 135536 done.
Movie 135567 done.
Movie 135569 done.
Movie 135777 done.
Movie 135787 done.
Movie 135803 done.
Movie 135815 done.
Movie 135861 done.
Movie 135885 done.
Movie 135887 done.
Movie 135937 done.
Movie 136012 done.
Movie 136016 done.
Movie 136018 done.
Movie 136020 done.
Movie 136024 done.
Movie 136297

Movie 164179 done.
Movie 164200 done.
Movie 164226 done.
Movie 164280 done.
Movie 164367 done.
Movie 164375 done.
Movie 164540 done.
Movie 164647 done.
Movie 164655 done.
Movie 164707 done.
Movie 164753 done.
Movie 164881 done.
Movie 164909 done.
Movie 164917 done.
Movie 165075 done.
Movie 165101 done.
Movie 165103 done.
Movie 165139 done.
Movie 165343 done.
Movie 165347 done.
Movie 165483 done.
Movie 165489 done.
Movie 165529 done.
Movie 165549 done.
Movie 165551 done.
Movie 165635 done.
Movie 165639 done.
Movie 165645 done.
Movie 165671 done.
Movie 165843 done.
Movie 165947 done.
Movie 165959 done.
Movie 165969 done.
Movie 166015 done.
Movie 166024 done.
Movie 166183 done.
Movie 166203 done.
Movie 166291 done.
Movie 166461 done.
Movie 166492 done.
Movie 166526 done.
Movie 166528 done.
Movie 166534 done.
Movie 166558 done.
Movie 166568 done.
Movie 166635 done.
Movie 166643 done.
Movie 166705 done.
Movie 166946 done.
Movie 167018 done.
Movie 167036 done.
Movie 167064 done.
Movie 167296

In [71]:
tag[0]

[1,
 [2355,
  122918,
  3114,
  89745,
  108932,
  68954,
  296,
  2,
  3,
  5,
  7,
  11,
  14,
  16,
  17,
  21,
  22,
  25,
  26,
  28]]

# Hybridization Step

In [65]:
watched = rating_train.groupby(['userId'], as_index=False)['movieId','rating','timestamp'].agg(lambda x: list(x))
watched = watched.drop(['rating','timestamp'],axis=1)
watched

Unnamed: 0,userId,movieId
0,1,"[163, 2478, 1580, 3053, 2046, 2985, 593, 1031,..."
1,2,"[86345, 74458, 58559, 1704, 318, 89774, 46970,..."
2,3,"[6238, 5919, 647, 1371, 2080, 72378, 2288, 130..."
3,4,"[2599, 45, 1304, 593, 3033, 1196, 3408, 902, 3..."
4,5,"[357, 590, 410, 349, 608, 318, 531, 150, 515, ..."
...,...,...
605,606,"[6373, 6031, 1300, 4977, 3681, 1393, 68872, 15..."
606,607,"[457, 2747, 241, 891, 527, 1023, 204, 1387, 1,..."
607,608,"[1777, 5956, 4262, 45499, 736, 2054, 110, 4011..."
608,609,"[590, 592, 329, 231, 185, 339, 356, 1, 457, 16..."


In [66]:
movie_title = pd.DataFrame(index=movies.movieId, columns = ['Title'], data=np.array([movies.title]).T)
movie_title

Unnamed: 0_level_0,Title
movieId,Unnamed: 1_level_1
1,Toy Story (1995)
2,Jumanji (1995)
3,Grumpier Old Men (1995)
4,Waiting to Exhale (1995)
5,Father of the Bride Part II (1995)
...,...
193581,Black Butler: Book of the Atlantic (2017)
193583,No Game No Life: Zero (2017)
193585,Flint (2017)
193587,Bungo Stray Dogs: Dead Apple (2018)


In [67]:
def getMovieCandidate(movie_id, movie):
    dataG = list(filter(lambda x: x[0]==movie_id, genre))
    dataG = list(map(list, np.array(dataG)[:,1]))
    movie += [int(i) for i in ((str(dataG)[2:-2]).replace(",","")).split()]
    
    dataT = list(filter(lambda x: x[0]==movie_id, tag))
    dataT = list(map(list, np.array(dataT)[:,1]))
    movie += [int(i) for i in ((str(dataT)[2:-2]).replace(",","")).split()]
    
    movie = list(OrderedDict.fromkeys(movie))
    return movie

def getMovieList(movieId):
    movie = []
    for id in movieId:
        movie = getMovieCandidate(id,movie)
    return movie

def getRatingList(movie):
    rate = []
    for i in movie:
        if i in predict.columns:
            rate.append([i,predict[i].loc[user.userId]])
    return rate

def getRecommendation(user):
    rec = []
    movie = getMovieList(user.movieId)
    rate = getRatingList(movie)
    rate.sort(key = lambda rate: rate[1], reverse=True) 
    rate = rate[:20]
    for i in rate:
        rec.append(movie_title.Title.loc[i[0]])
    return rec

In [68]:
rec_list = []
print('Get movie recommendations for every user')
for i in range(len(watched)):
    user = watched.loc[i]
    rec_list.append(getRecommendation(user))
    print('User',user.userId,'done.')

Get movie recommendations for every user
User 1 done.
User 2 done.
User 3 done.
User 4 done.
User 5 done.
User 6 done.
User 7 done.
User 8 done.
User 9 done.
User 10 done.
User 11 done.
User 12 done.
User 13 done.
User 14 done.
User 15 done.
User 16 done.
User 17 done.
User 18 done.
User 19 done.
User 20 done.
User 21 done.
User 22 done.
User 23 done.
User 24 done.
User 25 done.
User 26 done.
User 27 done.
User 28 done.
User 29 done.
User 30 done.
User 31 done.
User 32 done.
User 33 done.
User 34 done.
User 35 done.
User 36 done.
User 37 done.
User 38 done.
User 39 done.
User 40 done.
User 41 done.
User 42 done.
User 43 done.
User 44 done.
User 45 done.
User 46 done.
User 47 done.
User 48 done.
User 49 done.
User 50 done.
User 51 done.
User 52 done.
User 53 done.
User 54 done.
User 55 done.
User 56 done.
User 57 done.
User 58 done.
User 59 done.
User 60 done.
User 61 done.
User 62 done.
User 63 done.
User 64 done.
User 65 done.
User 66 done.
User 67 done.
User 68 done.
User 69 done.
Us

User 552 done.
User 553 done.
User 554 done.
User 555 done.
User 556 done.
User 557 done.
User 558 done.
User 559 done.
User 560 done.
User 561 done.
User 562 done.
User 563 done.
User 564 done.
User 565 done.
User 566 done.
User 567 done.
User 568 done.
User 569 done.
User 570 done.
User 571 done.
User 572 done.
User 573 done.
User 574 done.
User 575 done.
User 576 done.
User 577 done.
User 578 done.
User 579 done.
User 580 done.
User 581 done.
User 582 done.
User 583 done.
User 584 done.
User 585 done.
User 586 done.
User 587 done.
User 588 done.
User 589 done.
User 590 done.
User 591 done.
User 592 done.
User 593 done.
User 594 done.
User 595 done.
User 596 done.
User 597 done.
User 598 done.
User 599 done.
User 600 done.
User 601 done.
User 602 done.
User 603 done.
User 604 done.
User 605 done.
User 606 done.
User 607 done.
User 608 done.
User 609 done.
User 610 done.


In [69]:
rec_list[0]

['Yojimbo (1961)',
 'Lord of the Rings: The Fellowship of the Ring, The (2001)',
 'Matrix, The (1999)',
 'Fight Club (1999)',
 'Kolya (Kolja) (1996)',
 'L.A. Confidential (1997)',
 'Life Is Beautiful (La Vita è bella) (1997)',
 'Seven Samurai (Shichinin no samurai) (1954)',
 'Verdict, The (1982)',
 "Man Bites Dog (C'est arrivé près de chez vous) (1992)",
 'Philadelphia Story, The (1940)',
 'Wings of Desire (Himmel über Berlin, Der) (1987)',
 'Reservoir Dogs (1992)',
 'Conversation, The (1974)',
 'Snatch (2000)',
 'Lord of the Rings: The Two Towers, The (2002)',
 '400 Blows, The (Les quatre cents coups) (1959)',
 'On the Waterfront (1954)',
 'Manchurian Candidate, The (1962)',
 'Crouching Tiger, Hidden Dragon (Wo hu cang long) (2000)']

In [70]:
movie_recommendations = pd.DataFrame(data=rec_list, index=watched.userId, columns=['Movie1','Movie2','Movie3','Movie4','Movie5','Movie6','Movie7','Movie8','Movie9','Movie10','Movie11','Movie12','Movie13','Movie14','Movie15','Movie16','Movie17','Movie18','Movie19','Movie20'])
movie_recommendations

Unnamed: 0_level_0,Movie1,Movie2,Movie3,Movie4,Movie5,Movie6,Movie7,Movie8,Movie9,Movie10,Movie11,Movie12,Movie13,Movie14,Movie15,Movie16,Movie17,Movie18,Movie19,Movie20
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,Yojimbo (1961),"Lord of the Rings: The Fellowship of the Ring,...","Matrix, The (1999)",Fight Club (1999),Kolya (Kolja) (1996),L.A. Confidential (1997),Life Is Beautiful (La Vita è bella) (1997),Seven Samurai (Shichinin no samurai) (1954),"Verdict, The (1982)",Man Bites Dog (C'est arrivé près de chez vous)...,"Philadelphia Story, The (1940)","Wings of Desire (Himmel über Berlin, Der) (1987)",Reservoir Dogs (1992),"Conversation, The (1974)",Snatch (2000),"Lord of the Rings: The Two Towers, The (2002)","400 Blows, The (Les quatre cents coups) (1959)",On the Waterfront (1954),"Manchurian Candidate, The (1962)","Crouching Tiger, Hidden Dragon (Wo hu cang lon..."
2,"Matrix, The (1999)",Fight Club (1999),In the Mood For Love (Fa yeung nin wa) (2000),Life Is Beautiful (La Vita è bella) (1997),Seven Samurai (Shichinin no samurai) (1954),Man Bites Dog (C'est arrivé près de chez vous)...,Reservoir Dogs (1992),"400 Blows, The (Les quatre cents coups) (1959)","Crouching Tiger, Hidden Dragon (Wo hu cang lon...",Schindler's List (1993),Rear Window (1954),"Godfather: Part II, The (1974)",Dr. Strangelove or: How I Learned to Stop Worr...,"Godfather, The (1972)",Heidi Fleiss: Hollywood Madam (1995),Hoop Dreams (1994),Apocalypse Now (1979),Witness for the Prosecution (1957),"Sixth Sense, The (1999)","Shining, The (1980)"
3,"Matrix, The (1999)","Philadelphia Story, The (1940)",Ran (1985),Paths of Glory (1957),Pulp Fiction (1994),American History X (1998),"7th Voyage of Sinbad, The (1958)",Heathers (1989),Gattaca (1997),Almost Famous (2000),Ferris Bueller's Day Off (1986),Top Hat (1935),Star Trek II: The Wrath of Khan (1982),On the Town (1949),Apocalypse Now (1979),Funny Games U.S. (2007),Star Wars: Episode V - The Empire Strikes Back...,It's Such a Beautiful Day (2012),Game of Death (1978),"Deer Hunter, The (1978)"
4,Yojimbo (1961),Fight Club (1999),"Matrix, The (1999)","Lord of the Rings: The Fellowship of the Ring,...",Dune (2000),Rebel Without a Cause (1955),Kolya (Kolja) (1996),Dersu Uzala (1975),"Verdict, The (1982)",In the Mood For Love (Fa yeung nin wa) (2000),Key Largo (1948),Schindler's List (1993),Man Bites Dog (C'est arrivé près de chez vous)...,"Conversation, The (1974)",Life Is Beautiful (La Vita è bella) (1997),Never Cry Wolf (1983),True Grit (1969),Guys and Dolls (1955),"Jetée, La (1962)",Dangerous Liaisons (1988)
5,Fight Club (1999),Kolya (Kolja) (1996),"O Brother, Where Art Thou? (2000)",Schindler's List (1993),"Wings of Desire (Himmel über Berlin, Der) (1987)",Man Bites Dog (C'est arrivé près de chez vous)...,True Grit (1969),"Grand Day Out with Wallace and Gromit, A (1989)",Snatch (2000),"Philadelphia Story, The (1940)",Reservoir Dogs (1992),"Godfather: Part II, The (1974)","Deer Hunter, The (1978)","Godfather, The (1972)",Dr. Strangelove or: How I Learned to Stop Worr...,"Boondock Saints, The (2000)",Neon Genesis Evangelion: The End of Evangelion...,Apocalypse Now (1979),"Stunt Man, The (1980)",Saving Private Ryan (1998)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,Guess Who's Coming to Dinner (1967),Yojimbo (1961),"Lord of the Rings: The Fellowship of the Ring,...","Matrix, The (1999)",Fight Club (1999),Kolya (Kolja) (1996),Life Is Beautiful (La Vita è bella) (1997),In the Mood For Love (Fa yeung nin wa) (2000),L.A. Confidential (1997),Man Bites Dog (C'est arrivé près de chez vous)...,Seven Samurai (Shichinin no samurai) (1954),Never Cry Wolf (1983),"Verdict, The (1982)","Philadelphia Story, The (1940)",Reservoir Dogs (1992),"Wings of Desire (Himmel über Berlin, Der) (1987)",Dune (2000),"Lord of the Rings: The Two Towers, The (2002)",Rebel Without a Cause (1955),Dersu Uzala (1975)
607,Guess Who's Coming to Dinner (1967),Yojimbo (1961),"Matrix, The (1999)",Fight Club (1999),Kolya (Kolja) (1996),In the Mood For Love (Fa yeung nin wa) (2000),Man Bites Dog (C'est arrivé près de chez vous)...,Never Cry Wolf (1983),Seven Samurai (Shichinin no samurai) (1954),Do the Right Thing (1989),"Philadelphia Story, The (1940)","Verdict, The (1982)",Reservoir Dogs (1992),Dune (2000),"Grand Day Out with Wallace and Gromit, A (1989)",Snatch (2000),Manhattan (1979),Dersu Uzala (1975),"Conversation, The (1974)","Lock, Stock & Two Smoking Barrels (1998)"
608,Guess Who's Coming to Dinner (1967),Yojimbo (1961),"Lord of the Rings: The Fellowship of the Ring,...","Matrix, The (1999)",Fight Club (1999),Kolya (Kolja) (1996),Life Is Beautiful (La Vita è bella) (1997),In the Mood For Love (Fa yeung nin wa) (2000),L.A. Confidential (1997),Man Bites Dog (C'est arrivé près de chez vous)...,Never Cry Wolf (1983),Seven Samurai (Shichinin no samurai) (1954),Do the Right Thing (1989),"Philadelphia Story, The (1940)","Verdict, The (1982)",Reservoir Dogs (1992),"Wings of Desire (Himmel über Berlin, Der) (1987)","Lord of the Rings: The Two Towers, The (2002)",Dune (2000),Rebel Without a Cause (1955)
609,"Matrix, The (1999)",Fight Club (1999),Life Is Beautiful (La Vita è bella) (1997),Man Bites Dog (C'est arrivé près de chez vous)...,"Grand Day Out with Wallace and Gromit, A (1989)","Philadelphia Story, The (1940)",Schindler's List (1993),Reservoir Dogs (1992),True Grit (1969),Snatch (2000),"400 Blows, The (Les quatre cents coups) (1959)","Godfather: Part II, The (1974)","Day of the Doctor, The (2013)","Godfather, The (1972)","Lock, Stock & Two Smoking Barrels (1998)",Heidi Fleiss: Hollywood Madam (1995),Dr. Strangelove or: How I Learned to Stop Worr...,"Deer Hunter, The (1978)","Iron Giant, The (1999)",Apocalypse Now (1979)
