# Import Libraries

In [1]:
import pandas as pd
import numpy as np

# Load Dataset

In [2]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
df = pd.read_csv("ml-100k/u.data", sep='\t', names=r_cols, encoding='latin-1')

In [3]:
df

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596
...,...,...,...,...
99995,880,476,3,880175444
99996,716,204,5,879795543
99997,276,1090,1,874795795
99998,13,225,2,882399156


# Get Unique users

In [4]:
n_users = df.movie_id.unique().shape[0]

In [5]:
n_users

1682

# Create pivot table for user and movie based on ratings

In [6]:
data_matrix = df.pivot_table(index='user_id', columns= 'movie_id', values='rating')

In [7]:
data_matrix

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,...,,,,,,,,,,
940,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,
941,5.0,,,,,,4.0,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


# Replace null values with 0

In [8]:
data_matrix.replace(np.nan, 0, inplace=True)

In [9]:
data_matrix

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
940,0.0,0.0,0.0,2.0,0.0,0.0,4.0,5.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
941,5.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
942,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Find Cosine Similatity for user

In [30]:
data_matrix.T

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,4.0,...,2.0,3.0,4.0,0.0,4.0,0.0,0.0,5.0,0.0,0.0
2,3.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,...,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0
3,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,3.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,4.0,...,5.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0
5,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1678,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1679,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1680,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1681,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
from sklearn.metrics.pairwise import pairwise_distances
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')

In [32]:
item_similarity

array([[2.22044605e-16, 5.97617822e-01, 6.69755213e-01, ...,
        1.00000000e+00, 9.52816933e-01, 9.52816933e-01],
       [5.97617822e-01, 2.22044605e-16, 7.26930825e-01, ...,
        1.00000000e+00, 9.21700637e-01, 9.21700637e-01],
       [6.69755213e-01, 7.26930825e-01, 0.00000000e+00, ...,
        1.00000000e+00, 1.00000000e+00, 9.03124947e-01],
       ...,
       [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
        0.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [9.52816933e-01, 9.21700637e-01, 1.00000000e+00, ...,
        1.00000000e+00, 0.00000000e+00, 1.00000000e+00],
       [9.52816933e-01, 9.21700637e-01, 9.03124947e-01, ...,
        1.00000000e+00, 1.00000000e+00, 0.00000000e+00]])

In [33]:
item_similarity.shape

(1682, 1682)

# prediction table

In [34]:
data_matrix

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
940,0.0,0.0,0.0,2.0,0.0,0.0,4.0,5.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
941,5.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
942,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
data_matrix.dot(item_similarity)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,616.428901,664.653086,746.965311,598.695265,746.895121,861.239113,614.121389,640.125900,672.204338,768.310352,...,970.332404,957.025412,964.183172,964.183172,957.561379,979.390892,979.390892,979.390892,944.864780,935.755460
2,149.928769,185.857076,185.418658,168.599311,191.110165,202.767922,152.597478,168.958983,155.127700,181.010000,...,226.446251,226.791202,223.183521,223.183521,223.055189,223.898556,223.898556,223.898556,225.168694,226.403730
3,118.353879,128.171486,129.396607,121.411669,130.620441,141.436663,114.902015,124.035129,120.577110,131.568071,...,150.034062,150.322681,142.789070,142.789070,148.362888,140.973442,140.973442,140.973442,147.997642,150.006665
4,74.164114,83.319481,85.798899,78.763489,86.474094,96.620632,73.453167,81.028470,79.128511,90.265288,...,102.534061,102.455151,101.297548,101.297548,100.948439,96.475974,96.475974,96.475974,101.679125,102.840639
5,310.424915,320.352624,388.706670,305.515456,378.749907,466.078984,320.255770,327.776285,365.999511,409.723092,...,497.430734,491.614965,496.463530,496.463530,495.961038,502.973052,502.973052,502.973052,483.675488,485.977056
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,127.869019,159.176383,162.715003,151.201140,164.289177,193.548792,135.663308,153.222188,139.924952,176.622928,...,206.957763,206.669111,199.921287,199.921287,201.851538,208.242461,208.242461,208.242461,203.561393,205.294065
940,227.021807,258.458740,290.115015,222.515657,285.344220,329.526945,224.100883,228.931693,243.113144,289.759774,...,363.572215,357.953407,362.738981,362.738981,359.550311,363.982653,363.982653,363.982653,357.072408,358.022161
941,44.615597,62.938190,63.373646,58.330925,69.191369,80.282412,45.086406,58.953568,55.765495,70.633354,...,88.181938,88.097889,87.221223,87.221223,85.681481,88.301263,88.301263,88.301263,85.582358,86.327699
942,217.935278,243.362934,279.039306,220.667291,271.183312,305.163419,226.949858,216.993556,236.428363,270.036912,...,330.146861,323.014094,328.544719,328.544719,329.271697,332.693857,332.693857,332.693857,325.437585,330.300749


In [36]:
np.array([np.abs(item_similarity).sum(axis=1)])

array([[1381.26770602, 1397.87767225, 1476.39823586, ..., 1665.19308647,
        1648.67109923, 1651.24638904]])

In [41]:
def predict(data_matrix, similarity, type):
    if type=='item':
        pred = data_matrix.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred
        

In [42]:
item_prediction = predict(data_matrix, item_similarity, type='item')

In [43]:
item_prediction

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.446278,0.475473,0.505938,0.443633,0.512667,0.547939,0.446243,0.463059,0.474916,0.515821,...,0.580579,0.576202,0.582478,0.582478,0.575717,0.588155,0.588155,0.588155,0.573107,0.566696
2,0.108544,0.132957,0.125589,0.124932,0.131178,0.129005,0.110883,0.122223,0.109599,0.121525,...,0.135490,0.136546,0.134829,0.134829,0.134108,0.134458,0.134458,0.134458,0.136576,0.137111
3,0.085685,0.091690,0.087643,0.089966,0.089658,0.089985,0.083492,0.089725,0.085188,0.088331,...,0.089770,0.090506,0.086261,0.086261,0.089201,0.084659,0.084659,0.084659,0.089768,0.090845
4,0.053693,0.059604,0.058114,0.058364,0.059356,0.061472,0.053374,0.058615,0.055905,0.060601,...,0.061349,0.061686,0.061195,0.061195,0.060693,0.057937,0.057937,0.057937,0.061673,0.062281
5,0.224739,0.229171,0.263280,0.226387,0.259973,0.296529,0.232710,0.237109,0.258581,0.275076,...,0.297628,0.295990,0.299922,0.299922,0.298188,0.302051,0.302051,0.302051,0.293373,0.294309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.092574,0.113870,0.110211,0.112040,0.112768,0.123140,0.098578,0.110839,0.098858,0.118579,...,0.123829,0.124430,0.120776,0.120776,0.121360,0.125056,0.125056,0.125056,0.123470,0.124327
940,0.164358,0.184894,0.196502,0.164884,0.195860,0.209652,0.162840,0.165606,0.171761,0.194536,...,0.217536,0.215515,0.219136,0.219136,0.216173,0.218583,0.218583,0.218583,0.216582,0.216819
941,0.032300,0.045024,0.042924,0.043223,0.047493,0.051077,0.032761,0.042646,0.039399,0.047421,...,0.052762,0.053042,0.052692,0.052692,0.051514,0.053028,0.053028,0.053028,0.051910,0.052280
942,0.157779,0.174095,0.189000,0.163514,0.186140,0.194151,0.164910,0.156970,0.167038,0.181295,...,0.197537,0.194479,0.198479,0.198479,0.197969,0.199793,0.199793,0.199793,0.197394,0.200031


### As per User based filtering ,first have to find similarity between the input user and others

In [44]:
input_item = 34

In [45]:
item_sim_table=pd.DataFrame(item_similarity)

In [46]:
item_sim_table

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,2.220446e-16,5.976178e-01,0.669755,5.450621e-01,0.713286,0.883656,0.379021,0.518886,0.503712,0.726065,...,0.964613,1.0,1.000000,1.000000,0.964613,1.0,1.0,1.0,0.952817,0.952817
1,5.976178e-01,2.220446e-16,0.726931,4.974292e-01,0.681164,0.916437,0.616597,0.662998,0.744748,0.828918,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.921701,0.921701
2,6.697552e-01,7.269308e-01,0.000000,6.751336e-01,0.787043,0.893278,0.627079,0.799206,0.726331,0.841896,...,1.000000,1.0,1.000000,1.000000,0.967708,1.0,1.0,1.0,1.000000,0.903125
3,5.450621e-01,4.974292e-01,0.675134,1.110223e-16,0.665761,0.909692,0.510717,0.509764,0.580956,0.747439,...,1.000000,1.0,0.905978,0.905978,0.962391,1.0,1.0,1.0,0.943587,0.924782
4,7.132865e-01,6.811638e-01,0.787043,6.657605e-01,0.000000,0.962701,0.665231,0.740839,0.727552,0.944547,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,1.000000,0.905789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,1.000000e+00,1.000000e+00,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1678,1.000000e+00,1.000000e+00,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1679,1.000000e+00,1.000000e+00,1.000000,1.000000e+00,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,0.0,0.0,0.0,1.000000,1.000000
1680,9.528169e-01,9.217006e-01,1.000000,9.435867e-01,1.000000,1.000000,0.948502,0.917967,0.942640,1.000000,...,1.000000,1.0,1.000000,1.000000,1.000000,1.0,1.0,1.0,0.000000,1.000000


In [47]:
similar_input_item= item_sim_table[input_item].sort_values(ascending=True).head(5).index

In [48]:
similar_input_item

Index([34, 77, 246, 1030, 794], dtype='int64')

In [51]:
item_sim_table[input_item].sort_values(ascending=True)

34      1.110223e-16
77      3.969593e-01
246     4.007394e-01
1030    5.342057e-01
794     5.470462e-01
            ...     
1133    1.000000e+00
1136    1.000000e+00
1137    1.000000e+00
1126    1.000000e+00
1681    1.000000e+00
Name: 34, Length: 1682, dtype: float64

In [52]:
similar_input_item

Index([34, 77, 246, 1030, 794], dtype='int64')

In [53]:
#4.Convert into a list

similar_item_input=list(similar_input_item)

In [55]:
similar_item_input

[34, 77, 246, 1030, 794]

In [58]:
#5. Using similar_item_input,can select movie id from ratings table

similar_item_userid_list=[]
for sim_item in similar_item_input:
    sim=list(df[df['movie_id']==sim_item]['user_id'])
    similar_item_userid_list.append(sim)

In [59]:
similar_item_userid_list

[[286, 276, 94, 184, 1, 551, 297],
 [92,
  222,
  224,
  194,
  59,
  291,
  262,
  64,
  83,
  128,
  106,
  135,
  334,
  301,
  276,
  184,
  188,
  201,
  110,
  234,
  256,
  286,
  320,
  372,
  345,
  244,
  381,
  60,
  375,
  42,
  330,
  56,
  43,
  115,
  270,
  417,
  280,
  405,
  183,
  343,
  94,
  474,
  347,
  313,
  416,
  271,
  269,
  497,
  506,
  109,
  504,
  7,
  246,
  429,
  363,
  346,
  76,
  95,
  72,
  399,
  311,
  566,
  496,
  374,
  618,
  450,
  505,
  293,
  318,
  593,
  524,
  457,
  328,
  332,
  659,
  650,
  178,
  1,
  655,
  299,
  453,
  643,
  308,
  268,
  49,
  653,
  454,
  664,
  766,
  528,
  532,
  437,
  741,
  487,
  215,
  551,
  764,
  577,
  586,
  478,
  371,
  533,
  378,
  521,
  774,
  500,
  721,
  389,
  711,
  881,
  545,
  843,
  823,
  633,
  627,
  870,
  815,
  916,
  893,
  455,
  130,
  561,
  749,
  145,
  647,
  303,
  393,
  394,
  442,
  495,
  267,
  65,
  682,
  690,
  922,
  896,
  186,
  156,
  452,
  554,
  9

In [60]:
#6. Convert all the list as single
import itertools
similar_item_userid_single_list=list(itertools.chain.from_iterable(similar_item_userid_list))

In [61]:
len(similar_item_userid_single_list)

348

In [62]:
#7. Unique movieid from the list

unique_userid_similar_item=set(similar_item_userid_single_list)

In [63]:
unique_userid_similar_item

{1,
 6,
 7,
 13,
 18,
 26,
 27,
 32,
 38,
 42,
 43,
 49,
 50,
 56,
 58,
 59,
 60,
 63,
 64,
 65,
 69,
 70,
 72,
 73,
 76,
 77,
 79,
 83,
 85,
 89,
 92,
 94,
 95,
 99,
 102,
 104,
 106,
 109,
 110,
 113,
 115,
 116,
 128,
 130,
 135,
 139,
 145,
 150,
 152,
 156,
 174,
 176,
 178,
 183,
 184,
 186,
 188,
 189,
 193,
 194,
 195,
 201,
 212,
 214,
 215,
 221,
 222,
 224,
 232,
 234,
 243,
 244,
 246,
 256,
 262,
 267,
 268,
 269,
 270,
 271,
 276,
 279,
 280,
 286,
 287,
 291,
 293,
 294,
 295,
 296,
 297,
 299,
 301,
 303,
 305,
 308,
 311,
 313,
 314,
 318,
 320,
 323,
 327,
 328,
 330,
 332,
 334,
 339,
 342,
 343,
 344,
 345,
 346,
 347,
 354,
 359,
 361,
 363,
 367,
 371,
 372,
 374,
 375,
 376,
 378,
 381,
 385,
 387,
 389,
 392,
 393,
 394,
 399,
 405,
 416,
 417,
 429,
 432,
 433,
 435,
 436,
 437,
 439,
 442,
 445,
 450,
 452,
 453,
 454,
 455,
 457,
 463,
 467,
 468,
 470,
 473,
 474,
 477,
 478,
 486,
 487,
 490,
 495,
 496,
 497,
 498,
 500,
 503,
 504,
 505,
 506,
 521,
 523,

In [64]:
#8. Input user watched movie_list

input_item_watched_userid=list(df[df['movie_id']==input_item]['user_id'].values)

In [65]:
input_item_watched_userid

[286, 276, 94, 184, 1, 551, 297]

In [66]:
#9. Create a list which should have recom movieid to the input user

recom=[]
for per_id in unique_userid_similar_item:
    if(per_id in input_item_watched_userid):
        pass
    else:
        recom.append(per_id)

In [67]:
recom

[6,
 7,
 521,
 523,
 524,
 13,
 528,
 18,
 532,
 533,
 536,
 26,
 27,
 541,
 542,
 32,
 545,
 38,
 42,
 43,
 554,
 557,
 560,
 49,
 561,
 50,
 566,
 567,
 56,
 58,
 59,
 60,
 63,
 64,
 577,
 65,
 578,
 69,
 582,
 70,
 72,
 73,
 586,
 76,
 77,
 79,
 592,
 593,
 83,
 595,
 85,
 89,
 92,
 95,
 99,
 102,
 104,
 106,
 618,
 620,
 109,
 110,
 624,
 113,
 115,
 627,
 116,
 633,
 635,
 637,
 128,
 130,
 643,
 642,
 135,
 647,
 648,
 650,
 139,
 653,
 654,
 655,
 145,
 659,
 150,
 662,
 664,
 152,
 156,
 669,
 682,
 174,
 176,
 178,
 690,
 693,
 183,
 697,
 186,
 699,
 188,
 189,
 193,
 194,
 195,
 711,
 712,
 201,
 717,
 719,
 721,
 212,
 214,
 215,
 727,
 730,
 221,
 222,
 224,
 736,
 741,
 232,
 234,
 749,
 243,
 244,
 246,
 758,
 762,
 764,
 766,
 256,
 770,
 262,
 774,
 778,
 267,
 268,
 269,
 270,
 271,
 782,
 790,
 279,
 280,
 795,
 796,
 287,
 291,
 293,
 294,
 295,
 296,
 299,
 301,
 815,
 303,
 305,
 819,
 308,
 311,
 823,
 313,
 314,
 828,
 318,
 320,
 834,
 323,
 327,
 328,
 330,
 8

In [68]:
len(recom)

267

In [69]:
sorted(recom)

[6,
 7,
 13,
 18,
 26,
 27,
 32,
 38,
 42,
 43,
 49,
 50,
 56,
 58,
 59,
 60,
 63,
 64,
 65,
 69,
 70,
 72,
 73,
 76,
 77,
 79,
 83,
 85,
 89,
 92,
 95,
 99,
 102,
 104,
 106,
 109,
 110,
 113,
 115,
 116,
 128,
 130,
 135,
 139,
 145,
 150,
 152,
 156,
 174,
 176,
 178,
 183,
 186,
 188,
 189,
 193,
 194,
 195,
 201,
 212,
 214,
 215,
 221,
 222,
 224,
 232,
 234,
 243,
 244,
 246,
 256,
 262,
 267,
 268,
 269,
 270,
 271,
 279,
 280,
 287,
 291,
 293,
 294,
 295,
 296,
 299,
 301,
 303,
 305,
 308,
 311,
 313,
 314,
 318,
 320,
 323,
 327,
 328,
 330,
 332,
 334,
 339,
 342,
 343,
 344,
 345,
 346,
 347,
 354,
 359,
 361,
 363,
 367,
 371,
 372,
 374,
 375,
 376,
 378,
 381,
 385,
 387,
 389,
 392,
 393,
 394,
 399,
 405,
 416,
 417,
 429,
 432,
 433,
 435,
 436,
 437,
 439,
 442,
 445,
 450,
 452,
 453,
 454,
 455,
 457,
 463,
 467,
 468,
 470,
 473,
 474,
 477,
 478,
 486,
 487,
 490,
 495,
 496,
 497,
 498,
 500,
 503,
 504,
 505,
 506,
 521,
 523,
 524,
 528,
 532,
 533,
 536,
 5

In [71]:
# Cross Checking 
sorted(recom)

[6,
 7,
 13,
 18,
 26,
 27,
 32,
 38,
 42,
 43,
 49,
 50,
 56,
 58,
 59,
 60,
 63,
 64,
 65,
 69,
 70,
 72,
 73,
 76,
 77,
 79,
 83,
 85,
 89,
 92,
 95,
 99,
 102,
 104,
 106,
 109,
 110,
 113,
 115,
 116,
 128,
 130,
 135,
 139,
 145,
 150,
 152,
 156,
 174,
 176,
 178,
 183,
 186,
 188,
 189,
 193,
 194,
 195,
 201,
 212,
 214,
 215,
 221,
 222,
 224,
 232,
 234,
 243,
 244,
 246,
 256,
 262,
 267,
 268,
 269,
 270,
 271,
 279,
 280,
 287,
 291,
 293,
 294,
 295,
 296,
 299,
 301,
 303,
 305,
 308,
 311,
 313,
 314,
 318,
 320,
 323,
 327,
 328,
 330,
 332,
 334,
 339,
 342,
 343,
 344,
 345,
 346,
 347,
 354,
 359,
 361,
 363,
 367,
 371,
 372,
 374,
 375,
 376,
 378,
 381,
 385,
 387,
 389,
 392,
 393,
 394,
 399,
 405,
 416,
 417,
 429,
 432,
 433,
 435,
 436,
 437,
 439,
 442,
 445,
 450,
 452,
 453,
 454,
 455,
 457,
 463,
 467,
 468,
 470,
 473,
 474,
 477,
 478,
 486,
 487,
 490,
 495,
 496,
 497,
 498,
 500,
 503,
 504,
 505,
 506,
 521,
 523,
 524,
 528,
 532,
 533,
 536,
 5

In [72]:
# Cross Checking
sorted(unique_userid_similar_item)

[1,
 6,
 7,
 13,
 18,
 26,
 27,
 32,
 38,
 42,
 43,
 49,
 50,
 56,
 58,
 59,
 60,
 63,
 64,
 65,
 69,
 70,
 72,
 73,
 76,
 77,
 79,
 83,
 85,
 89,
 92,
 94,
 95,
 99,
 102,
 104,
 106,
 109,
 110,
 113,
 115,
 116,
 128,
 130,
 135,
 139,
 145,
 150,
 152,
 156,
 174,
 176,
 178,
 183,
 184,
 186,
 188,
 189,
 193,
 194,
 195,
 201,
 212,
 214,
 215,
 221,
 222,
 224,
 232,
 234,
 243,
 244,
 246,
 256,
 262,
 267,
 268,
 269,
 270,
 271,
 276,
 279,
 280,
 286,
 287,
 291,
 293,
 294,
 295,
 296,
 297,
 299,
 301,
 303,
 305,
 308,
 311,
 313,
 314,
 318,
 320,
 323,
 327,
 328,
 330,
 332,
 334,
 339,
 342,
 343,
 344,
 345,
 346,
 347,
 354,
 359,
 361,
 363,
 367,
 371,
 372,
 374,
 375,
 376,
 378,
 381,
 385,
 387,
 389,
 392,
 393,
 394,
 399,
 405,
 416,
 417,
 429,
 432,
 433,
 435,
 436,
 437,
 439,
 442,
 445,
 450,
 452,
 453,
 454,
 455,
 457,
 463,
 467,
 468,
 470,
 473,
 474,
 477,
 478,
 486,
 487,
 490,
 495,
 496,
 497,
 498,
 500,
 503,
 504,
 505,
 506,
 521,
 523,

In [76]:
# Checking the common movie list
list(set(unique_userid_similar_item)&set(input_item_watched_userid))

[1, 551, 297, 276, 184, 94, 286]

In [77]:
# Checking the common movie list
list(set(recom)&set(input_item_watched_userid))

[]

In [78]:
item_pred=pd.DataFrame(item_prediction)

In [79]:
item_pred

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.446278,0.475473,0.505938,0.443633,0.512667,0.547939,0.446243,0.463059,0.474916,0.515821,...,0.580579,0.576202,0.582478,0.582478,0.575717,0.588155,0.588155,0.588155,0.573107,0.566696
2,0.108544,0.132957,0.125589,0.124932,0.131178,0.129005,0.110883,0.122223,0.109599,0.121525,...,0.135490,0.136546,0.134829,0.134829,0.134108,0.134458,0.134458,0.134458,0.136576,0.137111
3,0.085685,0.091690,0.087643,0.089966,0.089658,0.089985,0.083492,0.089725,0.085188,0.088331,...,0.089770,0.090506,0.086261,0.086261,0.089201,0.084659,0.084659,0.084659,0.089768,0.090845
4,0.053693,0.059604,0.058114,0.058364,0.059356,0.061472,0.053374,0.058615,0.055905,0.060601,...,0.061349,0.061686,0.061195,0.061195,0.060693,0.057937,0.057937,0.057937,0.061673,0.062281
5,0.224739,0.229171,0.263280,0.226387,0.259973,0.296529,0.232710,0.237109,0.258581,0.275076,...,0.297628,0.295990,0.299922,0.299922,0.298188,0.302051,0.302051,0.302051,0.293373,0.294309
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.092574,0.113870,0.110211,0.112040,0.112768,0.123140,0.098578,0.110839,0.098858,0.118579,...,0.123829,0.124430,0.120776,0.120776,0.121360,0.125056,0.125056,0.125056,0.123470,0.124327
940,0.164358,0.184894,0.196502,0.164884,0.195860,0.209652,0.162840,0.165606,0.171761,0.194536,...,0.217536,0.215515,0.219136,0.219136,0.216173,0.218583,0.218583,0.218583,0.216582,0.216819
941,0.032300,0.045024,0.042924,0.043223,0.047493,0.051077,0.032761,0.042646,0.039399,0.047421,...,0.052762,0.053042,0.052692,0.052692,0.051514,0.053028,0.053028,0.053028,0.051910,0.052280
942,0.157779,0.174095,0.189000,0.163514,0.186140,0.194151,0.164910,0.156970,0.167038,0.181295,...,0.197537,0.194479,0.198479,0.198479,0.197969,0.199793,0.199793,0.199793,0.197394,0.200031


In [80]:
item_pred_Trans=item_pred.T

In [81]:
item_pred_Trans

user_id,1,2,3,4,5,6,7,8,9,10,...,934,935,936,937,938,939,940,941,942,943
0,0.446278,0.108544,0.085685,0.053693,0.224739,0.358069,0.782713,0.090678,0.044465,0.366060,...,0.300762,0.058182,0.268047,0.062958,0.148765,0.092574,0.164358,0.032300,0.157779,0.247672
1,0.475473,0.132957,0.091690,0.059604,0.229171,0.403467,0.807765,0.093242,0.050556,0.402074,...,0.315097,0.077715,0.312001,0.078982,0.186952,0.113870,0.184894,0.045024,0.174095,0.244892
2,0.505938,0.125589,0.087643,0.058114,0.263280,0.422446,0.879616,0.113380,0.051605,0.423936,...,0.351531,0.076278,0.279381,0.074514,0.179351,0.110211,0.196502,0.042924,0.189000,0.282630
3,0.443633,0.124932,0.089966,0.058364,0.226387,0.358540,0.749868,0.088327,0.047089,0.351163,...,0.286341,0.076032,0.301951,0.072409,0.183795,0.112040,0.164884,0.043223,0.163514,0.241440
4,0.512667,0.131178,0.089658,0.059356,0.259973,0.424767,0.856625,0.111532,0.052202,0.420332,...,0.347137,0.078739,0.308205,0.077618,0.188391,0.112768,0.195860,0.047493,0.186140,0.279338
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,0.588155,0.134458,0.084659,0.057937,0.302051,0.458549,0.957402,0.133863,0.055114,0.463085,...,0.385295,0.091365,0.316109,0.080072,0.210365,0.125056,0.218583,0.053028,0.199793,0.344104
1678,0.588155,0.134458,0.084659,0.057937,0.302051,0.458549,0.957402,0.133863,0.055114,0.463085,...,0.385295,0.091365,0.316109,0.080072,0.210365,0.125056,0.218583,0.053028,0.199793,0.344104
1679,0.588155,0.134458,0.084659,0.057937,0.302051,0.458549,0.957402,0.133863,0.055114,0.463085,...,0.385295,0.091365,0.316109,0.080072,0.210365,0.125056,0.218583,0.053028,0.199793,0.344104
1680,0.573107,0.136576,0.089768,0.061673,0.293373,0.450434,0.935085,0.128403,0.055457,0.452134,...,0.377542,0.089950,0.316951,0.080491,0.207785,0.123470,0.216582,0.051910,0.197394,0.330514


In [82]:
item_pred_Trans[34]

0       0.048955
1       0.051866
2       0.049287
3       0.051942
4       0.050116
          ...   
1677    0.045356
1678    0.045356
1679    0.045356
1680    0.048563
1681    0.048910
Name: 34, Length: 1682, dtype: float64

In [83]:
gg=item_pred_Trans[34]

In [84]:
gg

0       0.048955
1       0.051866
2       0.049287
3       0.051942
4       0.050116
          ...   
1677    0.045356
1678    0.045356
1679    0.045356
1680    0.048563
1681    0.048910
Name: 34, Length: 1682, dtype: float64

In [85]:
g=pd.DataFrame(gg)

In [86]:
g

Unnamed: 0,34
0,0.048955
1,0.051866
2,0.049287
3,0.051942
4,0.050116
...,...
1677,0.045356
1678,0.045356
1679,0.045356
1680,0.048563


In [87]:
s=g.T

In [88]:
s

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
34,0.048955,0.051866,0.049287,0.051942,0.050116,0.047591,0.048194,0.050822,0.046844,0.048091,...,0.048089,0.048768,0.047787,0.047787,0.048311,0.045356,0.045356,0.045356,0.048563,0.04891


In [89]:
s[187].values >=0.4

array([False])

In [91]:
# From recomd list select hightest rated film which would like by the user. Based on User prediction

highest_Rated=[]
input_item_pre=pd.DataFrame(item_pred[input_item])
input_item_pred=input_item_pre.T
for re in recom:
    value=input_item_pred[re].values
    if(value>=1):
        highest_Rated.append(re)


In [92]:
highest_Rated

[13, 655, 416, 450]

In [93]:
len(highest_Rated)

4

In [94]:
# Checking the common movie list
list(set(recom)&set(input_item_watched_userid))

[]

In [95]:
def itembased(input_item,item_similarity,item_predictions,similar_item_count,thres):
    #Convert the user_sim table into DataFrame
    item_sim_table=pd.DataFrame(item_similarity)
    #Find similarity item for 78 using cosine table
    similar_input_item= item_sim_table[input_item].sort_values(ascending=True).head(similar_item_count).index
    #Convert input to list
    similar_item_input=list(similar_input_item) 
    #Using similar_item_input,can select movie id from df table
    similar_item_userid_list=[]
    for sim_item in similar_item_input:
        sim=list(df[df['movie_id']==sim_item]['user_id'])
        similar_item_userid_list.append(sim)
    #Converting as a whole list
    import itertools
    similar_item_userid_single_list=list(itertools.chain.from_iterable(similar_item_userid_list))
    #Unique movieid from the list
    Unique_userid_similar_item=set(similar_item_userid_single_list)
    #Input movies watched by users
    input_item_watched_userid=list(df[df['movie_id']==input_item]['user_id'].values)
    #Create a list which should have recom movieid to the input user
    recom=[]
    for per_id in Unique_userid_similar_item:
        if(per_id in input_item_watched_userid):
            pass
        else:
            recom.append(per_id)
    #From recommendation list selecting only hightest rated(predicted) value
    highest_Rated=[]
    user_pred=pd.DataFrame(item_prediction)
    input_item_pre=pd.DataFrame(user_pred[input_item])
    input_item_pred=input_item_pre.T
    for re in recom:
        value=input_item_pred[re].values
        if(value>=thres):
            highest_Rated.append(re)
    highest_Rated
    return highest_Rated
    

In [101]:
Recommended_user=itembased(5,item_similarity,item_pred,5,0.8)

In [102]:
Recommended_user

[59, 279, 416, 450]

In [103]:
len(Recommended_user)

4