# Import library

In [1]:
import pandas as pd
import numpy as np

from scipy import sparse
from lightfm import LightFM # ใช้ทำ MF

  "LightFM was compiled without OpenMP support. "


# Import data

In [2]:
df = pd.read_csv('./dataset/ratings.csv')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [3]:
movie_name = pd.read_csv('./dataset/movies.csv')
movie_name.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


# Preparation

In [4]:
print('user:', df['userId'].nunique())
print('movie:', df['movieId'].nunique())
print('len rating:', len(df))

user: 610
movie: 9724
len rating: 100836


In [5]:
user = df['userId'].unique()
user

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
       157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
       170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 18

In [6]:
movie = df['movieId'].unique()
movie

array([     1,      3,      6, ..., 160836, 163937, 163981], dtype=int64)

# Interaction matrix

In [7]:
user_col = 'userId'
item_col = 'movieId'
rating_col = 'rating'
interactions = df.groupby([user_col, item_col])[rating_col]
interactions

<pandas.core.groupby.generic.SeriesGroupBy object at 0x000002102EEDF348>

In [8]:
interactions = df.groupby([user_col, item_col])[rating_col].sum()
interactions

userId  movieId
1       1          4.0
        3          4.0
        6          4.0
        47         5.0
        50         5.0
                  ... 
610     166534     4.0
        168248     5.0
        168250     5.0
        168252     5.0
        170875     3.0
Name: rating, Length: 100836, dtype: float64

In [9]:
interactions = df.groupby([user_col, item_col])[rating_col].sum().unstack()
interactions

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


In [10]:
interactions = df.groupby([user_col, item_col])[rating_col].sum().unstack().reset_index()
interactions

movieId,userId,1,2,3,4,5,6,7,8,9,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
0,1,4.0,,4.0,,,4.0,,,,...,,,,,,,,,,
1,2,,,,,,,,,,...,,,,,,,,,,
2,3,,,,,,,,,,...,,,,,,,,,,
3,4,,,,,,,,,,...,,,,,,,,,,
4,5,4.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,606,2.5,,,,,,2.5,,,...,,,,,,,,,,
606,607,4.0,,,,,,,,,...,,,,,,,,,,
607,608,2.5,2.0,2.0,,,,,,,...,,,,,,,,,,
608,609,3.0,,,,,,,,,...,,,,,,,,,,


In [11]:
interactions = df.groupby([user_col, item_col])[rating_col].sum().unstack().reset_index().fillna(0)
interactions

movieId,userId,1,2,3,4,5,6,7,8,9,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
0,1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
606,607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
interactions = df.groupby([user_col, item_col])[rating_col].sum().unstack().reset_index().fillna(0).set_index(user_col)
interactions

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# userId == 1
interactions.iloc[:1, :]

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Matrix factorization

#### Function to run matrix-factorization algorithm
- Required Input
    - interactions = dataset create by create_interaction_matrix
    - n_components = number of embeddings you want to create to define Item and user
    - loss = loss function
    - epoch = number of epochs
    - n_jobs = number of cores used for execution 

- Expected Output
    - Model - Trained model

In [14]:
interactions = interactions
n_components=30
loss='warp'
k=15
epoch=30
n_jobs = 4

In [15]:
interactions.values

array([[4. , 0. , 4. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [2.5, 2. , 2. , ..., 0. , 0. , 0. ],
       [3. , 0. , 0. , ..., 0. , 0. , 0. ],
       [5. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [16]:
# Compressed Sparse Row matrix
x = sparse.csr_matrix(interactions.values)
x

<610x9724 sparse matrix of type '<class 'numpy.float64'>'
	with 100836 stored elements in Compressed Sparse Row format>

In [17]:
model = LightFM(no_components= n_components, loss=loss, k=k) #ออกแบบโมเดล
model

<lightfm.lightfm.LightFM at 0x2102ecf8a48>

In [18]:
model.fit(x, epochs=epoch, num_threads = n_jobs) #train model

<lightfm.lightfm.LightFM at 0x2102ecf8a48>

# Predict

### user_dict

In [19]:
user_id = list(interactions.index)
user_id

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185

In [20]:
user_dict = {}

In [21]:
counter = 1
for i in user_id:
    user_dict[counter] = i
    counter += 1

In [22]:
user_dict

{1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 11: 11,
 12: 12,
 13: 13,
 14: 14,
 15: 15,
 16: 16,
 17: 17,
 18: 18,
 19: 19,
 20: 20,
 21: 21,
 22: 22,
 23: 23,
 24: 24,
 25: 25,
 26: 26,
 27: 27,
 28: 28,
 29: 29,
 30: 30,
 31: 31,
 32: 32,
 33: 33,
 34: 34,
 35: 35,
 36: 36,
 37: 37,
 38: 38,
 39: 39,
 40: 40,
 41: 41,
 42: 42,
 43: 43,
 44: 44,
 45: 45,
 46: 46,
 47: 47,
 48: 48,
 49: 49,
 50: 50,
 51: 51,
 52: 52,
 53: 53,
 54: 54,
 55: 55,
 56: 56,
 57: 57,
 58: 58,
 59: 59,
 60: 60,
 61: 61,
 62: 62,
 63: 63,
 64: 64,
 65: 65,
 66: 66,
 67: 67,
 68: 68,
 69: 69,
 70: 70,
 71: 71,
 72: 72,
 73: 73,
 74: 74,
 75: 75,
 76: 76,
 77: 77,
 78: 78,
 79: 79,
 80: 80,
 81: 81,
 82: 82,
 83: 83,
 84: 84,
 85: 85,
 86: 86,
 87: 87,
 88: 88,
 89: 89,
 90: 90,
 91: 91,
 92: 92,
 93: 93,
 94: 94,
 95: 95,
 96: 96,
 97: 97,
 98: 98,
 99: 99,
 100: 100,
 101: 101,
 102: 102,
 103: 103,
 104: 104,
 105: 105,
 106: 106,
 107: 107,
 108: 108,
 109: 109,
 110: 110,
 111: 

### movie_dict

In [23]:
movie_name.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [24]:
movie_name.loc[0,'movieId']

1

In [25]:
movie_name.loc[0,'title']

'Toy Story (1995)'

In [26]:
movie_name['title'].loc[0]

'Toy Story (1995)'

In [27]:
movie_name.shape[0]

9742

In [28]:
movie_dict = {}

In [29]:
for i in range(movie_name.shape[0]):
    movie_dict[movie_name['movieId'].loc[i]] = movie_name['title'].loc[i]

In [30]:
movie_dict

{1: 'Toy Story (1995)',
 2: 'Jumanji (1995)',
 3: 'Grumpier Old Men (1995)',
 4: 'Waiting to Exhale (1995)',
 5: 'Father of the Bride Part II (1995)',
 6: 'Heat (1995)',
 7: 'Sabrina (1995)',
 8: 'Tom and Huck (1995)',
 9: 'Sudden Death (1995)',
 10: 'GoldenEye (1995)',
 11: 'American President, The (1995)',
 12: 'Dracula: Dead and Loving It (1995)',
 13: 'Balto (1995)',
 14: 'Nixon (1995)',
 15: 'Cutthroat Island (1995)',
 16: 'Casino (1995)',
 17: 'Sense and Sensibility (1995)',
 18: 'Four Rooms (1995)',
 19: 'Ace Ventura: When Nature Calls (1995)',
 20: 'Money Train (1995)',
 21: 'Get Shorty (1995)',
 22: 'Copycat (1995)',
 23: 'Assassins (1995)',
 24: 'Powder (1995)',
 25: 'Leaving Las Vegas (1995)',
 26: 'Othello (1995)',
 27: 'Now and Then (1995)',
 28: 'Persuasion (1995)',
 29: 'City of Lost Children, The (Cité des enfants perdus, La) (1995)',
 30: 'Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)',
 31: 'Dangerous Minds (1995)',
 32: 'Twelve Monkeys (a.k.a. 12 Monkeys) (199

---

In [31]:
interactions.loc[user_id,:]

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Predict

In [32]:
model = model
interactions = interactions
user_id = 1
user_dict = user_dict
movie_dict = movie_dict
threshold = 0
nrec_items = 10
show = True

In [33]:
n_users, n_items = interactions.shape

In [34]:
user_X = user_dict[user_id]

In [35]:
np.arange(n_items)

array([   0,    1,    2, ..., 9721, 9722, 9723])

In [36]:
scores = pd.Series(model.predict(user_X,np.arange(n_items)))
scores

0       0.142519
1      -1.348160
2      -2.606803
3      -3.865181
4      -1.620985
          ...   
9719   -2.491760
9720   -2.088889
9721   -2.161210
9722   -2.316696
9723   -1.756734
Length: 9724, dtype: float32

In [37]:
scores.index = interactions.columns

In [38]:
scores = list(pd.Series(scores.sort_values(ascending=False).index))
scores

[58559,
 79132,
 74458,
 72998,
 109487,
 59315,
 2959,
 91529,
 99114,
 80463,
 106782,
 68157,
 91658,
 68954,
 122904,
 318,
 60069,
 69122,
 48516,
 122882,
 5952,
 134130,
 112852,
 7153,
 3949,
 4993,
 112556,
 116797,
 70286,
 4226,
 89745,
 4878,
 73017,
 89492,
 109374,
 48780,
 2571,
 59369,
 63082,
 115713,
 6874,
 54286,
 858,
 356,
 134853,
 593,
 112552,
 3578,
 55820,
 56367,
 2858,
 111759,
 69481,
 84152,
 51662,
 71535,
 81845,
 33794,
 112183,
 50872,
 64614,
 6377,
 60684,
 51255,
 5989,
 48774,
 4886,
 96079,
 88125,
 44191,
 296,
 81591,
 77561,
 68358,
 115149,
 85414,
 78499,
 122886,
 50,
 48394,
 4963,
 91542,
 139385,
 35836,
 55247,
 68237,
 91500,
 72011,
 87232,
 7361,
 76093,
 2329,
 69844,
 49530,
 67255,
 8665,
 79091,
 4995,
 96610,
 94959,
 61323,
 7438,
 260,
 59784,
 1704,
 106489,
 81932,
 6539,
 55765,
 92259,
 72641,
 4306,
 79702,
 6016,
 80489,
 105504,
 110102,
 88129,
 64839,
 76251,
 57669,
 4011,
 5445,
 46578,
 106920,
 112290,
 4262,
 119

In [39]:
known_items = list(pd.Series(interactions.loc[user_id,:] \
                             [interactions.loc[user_id,:] > threshold].index) \
                             .sort_values(ascending=False))
known_items

[5060,
 4006,
 3809,
 3793,
 3744,
 3740,
 3729,
 3703,
 3702,
 3671,
 3639,
 3617,
 3578,
 3527,
 3489,
 3479,
 3450,
 3448,
 3441,
 3440,
 3439,
 3386,
 3273,
 3253,
 3247,
 3243,
 3176,
 3168,
 3147,
 3062,
 3053,
 3052,
 3034,
 3033,
 2997,
 2993,
 2991,
 2987,
 2985,
 2959,
 2949,
 2948,
 2947,
 2944,
 2916,
 2899,
 2872,
 2858,
 2826,
 2797,
 2761,
 2716,
 2700,
 2692,
 2657,
 2654,
 2648,
 2644,
 2641,
 2640,
 2628,
 2617,
 2616,
 2596,
 2580,
 2571,
 2542,
 2529,
 2528,
 2502,
 2492,
 2478,
 2470,
 2459,
 2450,
 2427,
 2414,
 2406,
 2395,
 2389,
 2387,
 2366,
 2353,
 2338,
 2329,
 2291,
 2273,
 2268,
 2253,
 2193,
 2174,
 2161,
 2143,
 2141,
 2139,
 2137,
 2116,
 2115,
 2105,
 2099,
 2096,
 2094,
 2093,
 2090,
 2078,
 2058,
 2054,
 2048,
 2046,
 2033,
 2028,
 2018,
 2012,
 2005,
 2000,
 1967,
 1954,
 1927,
 1920,
 1805,
 1804,
 1793,
 1777,
 1732,
 1676,
 1644,
 1625,
 1620,
 1617,
 1587,
 1580,
 1573,
 1552,
 1517,
 1500,
 1473,
 1445,
 1408,
 1396,
 1377,
 1348,
 1298,
 1291,

In [40]:
scores = [x for x in scores if x not in known_items]
scores

[58559,
 79132,
 74458,
 72998,
 109487,
 59315,
 91529,
 99114,
 80463,
 106782,
 68157,
 91658,
 68954,
 122904,
 318,
 60069,
 69122,
 48516,
 122882,
 5952,
 134130,
 112852,
 7153,
 3949,
 4993,
 112556,
 116797,
 70286,
 4226,
 89745,
 4878,
 73017,
 89492,
 109374,
 48780,
 59369,
 63082,
 115713,
 6874,
 54286,
 858,
 134853,
 112552,
 55820,
 56367,
 111759,
 69481,
 84152,
 51662,
 71535,
 81845,
 33794,
 112183,
 50872,
 64614,
 6377,
 60684,
 51255,
 5989,
 48774,
 4886,
 96079,
 88125,
 44191,
 81591,
 77561,
 68358,
 115149,
 85414,
 78499,
 122886,
 48394,
 4963,
 91542,
 139385,
 35836,
 55247,
 68237,
 91500,
 72011,
 87232,
 7361,
 76093,
 69844,
 49530,
 67255,
 8665,
 79091,
 4995,
 96610,
 94959,
 61323,
 7438,
 59784,
 1704,
 106489,
 81932,
 6539,
 55765,
 92259,
 72641,
 4306,
 79702,
 6016,
 80489,
 105504,
 110102,
 88129,
 64839,
 76251,
 57669,
 4011,
 5445,
 46578,
 106920,
 112290,
 4262,
 138036,
 49272,
 119145,
 81834,
 4022,
 48385,
 97304,
 51540,
 11

In [41]:
return_score_list = scores[0:nrec_items]
return_score_list

[58559, 79132, 74458, 72998, 109487, 59315, 91529, 99114, 80463, 106782]

In [42]:
known_items = list(pd.Series(known_items).apply(lambda x: movie_dict[x]))
known_items

['M*A*S*H (a.k.a. MASH) (1970)',
 'Transformers: The Movie (1986)',
 'What About Bob? (1991)',
 'X-Men (2000)',
 'Shaft (2000)',
 'Big Trouble in Little China (1986)',
 'Shaft (1971)',
 'Road Warrior, The (Mad Max 2) (1981)',
 'Mad Max (1979)',
 'Blazing Saddles (1974)',
 'Man with the Golden Gun, The (1974)',
 'Road Trip (2000)',
 'Gladiator (2000)',
 'Predator (1987)',
 'Hook (1991)',
 'Ladyhawke (1985)',
 'Grumpy Old Men (1993)',
 'Good Morning, Vietnam (1987)',
 'Red Dawn (1984)',
 'Teenage Mutant Ninja Turtles III (1993)',
 'Teenage Mutant Ninja Turtles II: The Secret of the Ooze (1991)',
 'JFK (1991)',
 'Scream 3 (2000)',
 "Wayne's World (1992)",
 'Sister Act (1992)',
 'Encino Man (1992)',
 'Talented Mr. Ripley, The (1999)',
 'Easy Rider (1969)',
 'Green Mile, The (1999)',
 'Longest Day, The (1962)',
 'Messenger: The Story of Joan of Arc, The (1999)',
 'Dogma (1999)',
 'Robin Hood (1973)',
 'Spaceballs (1987)',
 'Being John Malkovich (1999)',
 'Thunderball (1965)',
 'Live and Let

In [43]:
scores = list(pd.Series(return_score_list).apply(lambda x: movie_dict[x]))
scores

['Dark Knight, The (2008)',
 'Inception (2010)',
 'Shutter Island (2010)',
 'Avatar (2009)',
 'Interstellar (2014)',
 'Iron Man (2008)',
 'Dark Knight Rises, The (2012)',
 'Django Unchained (2012)',
 'Social Network, The (2010)',
 'Wolf of Wall Street, The (2013)']