In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval
from scipy import linalg
from scipy.sparse import csr_matrix
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate, train_test_split
from collections import defaultdict

In [2]:
ratings = pd.read_csv("../data/ratings_small.csv")

In [3]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


In [125]:
ratings.iloc[-1:]['userId']

100003    671
Name: userId, dtype: int64

In [97]:
movies_metadata = pd.read_csv("../data/movies_metadata.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [98]:
movies_metadata.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [99]:
movies_metadata.head(3)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0


In [100]:
literal_eval(movies_metadata['genres'].loc[0])

[{'id': 16, 'name': 'Animation'},
 {'id': 35, 'name': 'Comedy'},
 {'id': 10751, 'name': 'Family'}]

In [101]:
def extract_genres(x):
    x = literal_eval(x)
    if isinstance(x, list):
        genre_names = [i['name'] for i in x]
        if len(genre_names) > 3:
            genre_names= genre_names[:3]
        return genre_names
    return []

In [102]:
movies_metadata['genres']= movies_metadata['genres'].apply(extract_genres)

In [103]:
movies_metadata['genres']

0         [Animation, Comedy, Family]
1        [Adventure, Fantasy, Family]
2                   [Romance, Comedy]
3            [Comedy, Drama, Romance]
4                            [Comedy]
                     ...             
45461                 [Drama, Family]
45462                         [Drama]
45463       [Action, Drama, Thriller]
45464                              []
45465                              []
Name: genres, Length: 45466, dtype: object

In [104]:
movies = movies_metadata.drop(columns=['adult', 'belongs_to_collection', 'budget', 'homepage',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'video',
       'vote_average', 'vote_count'])

In [105]:
movies.head(2)

Unnamed: 0,genres,id,title
0,"[Animation, Comedy, Family]",862,Toy Story
1,"[Adventure, Fantasy, Family]",8844,Jumanji


In [106]:
movies = movies.rename(columns={'id':'movieId'})

In [107]:
movies.head(3)

Unnamed: 0,genres,movieId,title
0,"[Animation, Comedy, Family]",862,Toy Story
1,"[Adventure, Fantasy, Family]",8844,Jumanji
2,"[Romance, Comedy]",15602,Grumpier Old Men


In [16]:
movies.shape

(45466, 3)

In [17]:
ratings.head(2)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179


In [18]:
ratings.shape

(100004, 4)

In [18]:
ratings_matrix = np.ndarray(
    shape=(np.max(ratings.movieId.values), np.max(ratings.userId.values)),
    dtype=np.uint8)
ratings_matrix[ratings.movieId.values-1, ratings.userId.values-1] = ratings.rating.values

In [19]:
ratings_matrix

array([[0, 0, 0, ..., 0, 4, 5],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [20]:
ratings_matrix.shape

(163949, 671)

In [21]:
normalised_mat = ratings_matrix - np.asarray([(np.mean(ratings_matrix, 1))]).T
normalised_mat

array([[-1.38897168, -1.38897168, -1.38897168, ..., -1.38897168,
         2.61102832,  3.61102832],
       [-0.5290611 , -0.5290611 , -0.5290611 , ..., -0.5290611 ,
        -0.5290611 , -0.5290611 ],
       [-0.27123696, -0.27123696, -0.27123696, ..., -0.27123696,
        -0.27123696, -0.27123696],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.00745156, -0.00745156, -0.00745156, ..., -0.00745156,
        -0.00745156, -0.00745156]])

In [22]:
normalised_mat.shape

(163949, 671)

In [23]:
A = normalised_mat.T / np.sqrt(ratings_matrix.shape[0] - 1)

In [24]:
U, S, V = linalg.svd(A, full_matrices=False)

__Now I have to calculate cosine similarity between the values and then create a function to extract the movies from the movie dataframe__

__Below I will try another approach__

In [108]:
movies.head(2)

Unnamed: 0,genres,movieId,title
0,"[Animation, Comedy, Family]",862,Toy Story
1,"[Adventure, Fantasy, Family]",8844,Jumanji


In [165]:
movies.loc[movies['movieId'] == '627']

Unnamed: 0,genres,movieId,title
758,"[Drama, Crime]",627,Trainspotting


In [39]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [20]:
movies_ = movies

In [21]:
movies_ = movies[["movieId", "title", "genres"]]
movies_.head(2)

Unnamed: 0,movieId,title,genres
0,862,Toy Story,"[Animation, Comedy, Family]"
1,8844,Jumanji,"[Adventure, Fantasy, Family]"


In [22]:
movies_['genres'] = ["".join(string) for string in movies_['genres']]
movies_.head(2)

Unnamed: 0,movieId,title,genres
0,862,Toy Story,AnimationComedyFamily
1,8844,Jumanji,AdventureFantasyFamily


In [40]:
ratings = ratings.drop(columns=["timestamp"])
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [54]:
ratings.sort_values('movieId').head()

Unnamed: 0,userId,movieId,rating
9713,68,1,4.0
35933,261,1,1.5
52631,383,1,5.0
35983,262,1,2.5
12038,77,1,4.0


In [55]:
movies_.sort_values('movieId').head()

Unnamed: 0,movieId,title,genres
2429,100,"Lock, Stock and Two Smoking Barrels",ComedyCrime
13609,10000,La estrategia del caracol,ComedyDrama
4435,10001,Young Einstein,ComedyScience Fiction
17451,100010,Flight Command,DramaWar
36946,100017,Hounded,Drama


In [68]:
movies_.drop(movies_.index[19730],inplace=True)
movies_.drop(movies_.index[29502],inplace=True)
movies_.drop(movies_.index[35585],inplace=True)

In [69]:
movies_.head(3)

Unnamed: 0,movieId,title,genres
0,862,Toy Story,AnimationComedyFamily
1,8844,Jumanji,AdventureFantasyFamily
2,15602,Grumpier Old Men,RomanceComedy


In [71]:
movies_.movieId = movies_.movieId.astype(np.int64)

In [74]:
ratings_df = pd.merge(ratings,movies_, on='movieId')
ratings_df

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1371,2.5,Rocky III,Drama
1,4,1371,4.0,Rocky III,Drama
2,7,1371,3.0,Rocky III,Drama
3,19,1371,4.0,Rocky III,Drama
4,21,1371,3.0,Rocky III,Drama
...,...,...,...,...,...
44989,652,129009,4.0,Love Is a Ball,ComedyRomance
44990,653,2103,3.0,Solaris,DramaScience FictionMystery
44991,659,167,4.0,K-PAX,DramaScience Fiction
44992,659,563,3.0,Starship Troopers,AdventureActionThriller


In [94]:
ratings_df.drop_duplicates()

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1371,2.5,Rocky III,Drama
1,4,1371,4.0,Rocky III,Drama
2,7,1371,3.0,Rocky III,Drama
3,19,1371,4.0,Rocky III,Drama
4,21,1371,3.0,Rocky III,Drama
...,...,...,...,...,...
44989,652,129009,4.0,Love Is a Ball,ComedyRomance
44990,653,2103,3.0,Solaris,DramaScience FictionMystery
44991,659,167,4.0,K-PAX,DramaScience Fiction
44992,659,563,3.0,Starship Troopers,AdventureActionThriller


In [77]:
matrix= ratings_df.pivot_table(index='userId', columns='title', values='rating').fillna(0)
matrix.head()

title,!Women Art Revolution,'Gator Bait,'Twas the Night Before Christmas,...And God Created Woman,00 Schneider - Jagd auf Nihil Baxter,10 Items or Less,10 Things I Hate About You,"10,000 BC",11'09''01 - September 11,12 Angry Men,...,Zodiac,Zombie Flesh Eaters,Zombie Holocaust,Zozo,eXistenZ,xXx,¡Three Amigos!,À nos amours,Ödipussi,Şaban Oğlu Şaban
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


__The code below correctly implements SVD prediction__

In [81]:
movies_

Unnamed: 0,movieId,title,genres
0,862,Toy Story,AnimationComedyFamily
1,8844,Jumanji,AdventureFantasyFamily
2,15602,Grumpier Old Men,RomanceComedy
3,31357,Waiting to Exhale,ComedyDramaRomance
4,11862,Father of the Bride Part II,Comedy
...,...,...,...
45461,439050,Subdue,DramaFamily
45462,111109,Century of Birthing,Drama
45463,67758,Betrayal,ActionDramaThriller
45464,227506,Satan Triumphant,


In [82]:
ratings

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0
...,...,...,...
99999,671,6268,2.5
100000,671,6269,4.0
100001,671,6365,4.0
100002,671,6385,2.5


In [9]:
import requests
from pymongo import MongoClient
from dotenv import load_dotenv
import os

In [10]:
load_dotenv()
username = os.getenv("USER")
password = os.getenv("PASS")
url = f"mongodb+srv://{username}:{password}@datacluster.2umgq.mongodb.net/myFirstDatabase?retryWrites=true&w=majority"
client = MongoClient(url)
db = client["movies"]
ratings = db.ratings_small

In [114]:
ratings_new = db.ratings_new

In [148]:
list(ratings_new.find({}))

[{'_id': ObjectId('61942378c2297cfa7f238e67'),
  'userId': 930,
  'movieId': 597,
  'rating': 1.0},
 {'_id': ObjectId('61942378c2297cfa7f238e68'),
  'userId': 930,
  'movieId': 8844,
  'rating': 5.0},
 {'_id': ObjectId('61942378c2297cfa7f238e69'),
  'userId': 930,
  'movieId': 6957,
  'rating': 1.0},
 {'_id': ObjectId('61942379c2297cfa7f238e6a'),
  'userId': 930,
  'movieId': 13,
  'rating': 5.0},
 {'_id': ObjectId('61942379c2297cfa7f238e6b'),
  'userId': 930,
  'movieId': 771,
  'rating': 5.0},
 {'_id': ObjectId('6194278cc2297cfa7f238e6c'),
  'userId': 852,
  'movieId': 278,
  'rating': 5.0},
 {'_id': ObjectId('6194278cc2297cfa7f238e6d'),
  'userId': 852,
  'movieId': 862,
  'rating': 4.0},
 {'_id': ObjectId('6194278cc2297cfa7f238e6e'),
  'userId': 852,
  'movieId': 8844,
  'rating': 2.0},
 {'_id': ObjectId('6194278cc2297cfa7f238e6f'),
  'userId': 852,
  'movieId': 6957,
  'rating': 5.0},
 {'_id': ObjectId('6194278cc2297cfa7f238e70'),
  'userId': 852,
  'movieId': 597,
  'rating': 3.0

In [13]:
list(ratings.find({}))

[{'_id': ObjectId('618be4749a7acca5a0165b3d'),
  'userId': 1,
  'movieId': 31,
  'rating': 2.5,
  'timestamp': 1260759144},
 {'_id': ObjectId('618be4749a7acca5a0165b3e'),
  'userId': 1,
  'movieId': 1029,
  'rating': 3.0,
  'timestamp': 1260759179},
 {'_id': ObjectId('618be4749a7acca5a0165b3f'),
  'userId': 1,
  'movieId': 1061,
  'rating': 3.0,
  'timestamp': 1260759182},
 {'_id': ObjectId('618be4749a7acca5a0165b40'),
  'userId': 1,
  'movieId': 1129,
  'rating': 2.0,
  'timestamp': 1260759185},
 {'_id': ObjectId('618be4749a7acca5a0165b41'),
  'userId': 1,
  'movieId': 1172,
  'rating': 4.0,
  'timestamp': 1260759205},
 {'_id': ObjectId('618be4749a7acca5a0165b42'),
  'userId': 1,
  'movieId': 1263,
  'rating': 2.0,
  'timestamp': 1260759151},
 {'_id': ObjectId('618be4749a7acca5a0165b43'),
  'userId': 1,
  'movieId': 1287,
  'rating': 2.0,
  'timestamp': 1260759187},
 {'_id': ObjectId('618be4749a7acca5a0165b44'),
  'userId': 1,
  'movieId': 1293,
  'rating': 2.0,
  'timestamp': 1260759

In [18]:
df = pd.DataFrame(list(ratings.find()))
df

Unnamed: 0,_id,userId,movieId,rating,timestamp
0,618be4749a7acca5a0165b3d,1,31,2.5,1260759144
1,618be4749a7acca5a0165b3e,1,1029,3.0,1260759179
2,618be4749a7acca5a0165b3f,1,1061,3.0,1260759182
3,618be4749a7acca5a0165b40,1,1129,2.0,1260759185
4,618be4749a7acca5a0165b41,1,1172,4.0,1260759205
...,...,...,...,...,...
99999,618be4749a7acca5a017e1dc,671,6268,2.5,1065579370
100000,618be4749a7acca5a017e1dd,671,6269,4.0,1065149201
100001,618be4749a7acca5a017e1de,671,6365,4.0,1070940363
100002,618be4749a7acca5a017e1df,671,6385,2.5,1070979663


In [149]:
df_new = pd.DataFrame(list(ratings_new.find()))
df_new = df_new.drop(columns=['_id'])
df_new

Unnamed: 0,userId,movieId,rating
0,930,597,1.0
1,930,8844,5.0
2,930,6957,1.0
3,930,13,5.0
4,930,771,5.0
5,852,278,5.0
6,852,862,4.0
7,852,8844,2.0
8,852,6957,5.0
9,852,597,3.0


In [140]:
df = df.drop(columns=['_id', 'timestamp'])
df.head(5)

KeyError: "['_id' 'timestamp'] not found in axis"

In [147]:
df.loc[df['userId'] == 669]

Unnamed: 0,userId,movieId,rating
99821,669,223,4.0
99822,669,260,5.0
99823,669,381,3.0
99824,669,480,3.0
99825,669,785,4.0
99826,669,913,5.0
99827,669,968,4.0
99828,669,1135,3.0
99829,669,1210,3.0
99830,669,1304,5.0


In [150]:
df_user = df.append(df_new)
df_user.reset_index().drop(columns=['index'])

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0
...,...,...,...
100024,852,864,3.0
100025,852,433,3.0
100026,852,155,5.0
100027,852,10674,4.0


In [151]:
reader = Reader(rating_scale=(1, 5))

In [152]:
data = Dataset.load_from_df(df_user, reader)

In [153]:
trainset, testset = train_test_split(data, test_size = .25)

In [154]:
model = SVD(n_factors=100)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fe093503580>

In [155]:
model.qi.shape

(8222, 100)

In [125]:
## iid = 862 "Toy Story"
model.predict(20, 862)

Prediction(uid=20, iid=862, r_ui=None, est=3.1007957386444747, details={'was_impossible': False})

In [156]:
predictions = model.test(testset)

In [191]:
predictions[0:5]

[Prediction(uid=230, iid=81591, r_ui=3.5, est=4.527792045455145, details={'was_impossible': False}),
 Prediction(uid=214, iid=1275, r_ui=4.0, est=3.444970808148971, details={'was_impossible': False}),
 Prediction(uid=388, iid=3046, r_ui=2.0, est=3.475750063897901, details={'was_impossible': False}),
 Prediction(uid=389, iid=236, r_ui=3.0, est=3.41911650988372, details={'was_impossible': False}),
 Prediction(uid=457, iid=2726, r_ui=3.0, est=2.9869892439998877, details={'was_impossible': False})]

In [193]:
len(predictions)

25008

In [213]:
uid_ = []
iid_ = []
est_ = []
for uid, iid, r_ui, est, details in predictions:
    uid_.append(uid)
    iid_.append(iid)
    est_.append(est)
preds_ = pd.DataFrame({
    'userId' : uid_, 
    'movieId' : iid_,
    'rating_prediction' : est_},
columns=['userId', 'movieId', 'rating_prediction'])


In [216]:
preds_.sort_values('userId')

Unnamed: 0,userId,movieId,rating_prediction
14762,1,1263,3.319078
11289,1,3671,2.999939
9956,1,2193,2.637955
2500,1,1061,2.733859
22967,2,537,3.317155
...,...,...,...
12625,671,4022,3.596558
13891,852,864,3.587318
10734,852,627,3.297607
24497,852,8844,3.738908


In [157]:
def get_top_n(predictions, n=5):
    """Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n
        

In [158]:
top_n = get_top_n(predictions)

In [159]:
userId = []
movieId = []
for uid, user_ratings in top_n.items():
    userId.append(uid)
    movieId.append([iid for (iid, _) in user_ratings])

In [160]:
predictions_df = pd.DataFrame({
    'userId': userId,
    'movieId': movieId
},
    columns = ['userId', 'movieId']
)
predictions_df

Unnamed: 0,userId,movieId
0,230,"[2329, 608, 44665, 593, 2502]"
1,214,"[1221, 1089, 2076, 1225, 1267]"
2,388,"[912, 1219, 593, 2959, 1210]"
3,389,"[161, 151, 62, 380, 2]"
4,457,"[858, 318, 4235, 2959, 5952]"
...,...,...
666,55,"[260, 32, 1, 1073, 100]"
667,445,"[318, 5291, 80463]"
668,399,"[194, 300, 588, 480]"
669,172,"[509, 480, 539, 586]"


In [161]:
predictions_df = predictions_df.sort_values('userId')[:1000]
predictions_df

Unnamed: 0,userId,movieId
508,1,"[1263, 3671, 1061, 2193]"
510,2,"[47, 50, 527, 589, 319]"
486,3,"[7361, 1197, 778, 1210, 5669]"
210,4,"[858, 1089, 1136, 2020, 1079]"
373,5,"[1221, 1968, 5816, 40819, 1682]"
...,...,...
598,668,"[1221, 1233, 1213, 3000]"
391,669,"[1953, 2599, 785, 3174, 3863]"
275,670,"[318, 2571, 32, 457, 36]"
160,671,"[50, 2324, 1196, 1259, 4993]"


In [185]:
df.loc[df['userId'] == 1]

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0
5,1,1263,2.0
6,1,1287,2.0
7,1,1293,2.0
8,1,1339,3.5
9,1,1343,2.0


In [166]:
predictions_df.iloc[-1:]

Unnamed: 0,userId,movieId
640,852,"[8844, 278, 864, 627]"


In [167]:
model.predict(852, 8844), model.predict(852, 278), model.predict(852, 864), model.predict(852,627 )

(Prediction(uid=852, iid=8844, r_ui=None, est=3.7389083413588877, details={'was_impossible': False}),
 Prediction(uid=852, iid=278, r_ui=None, est=3.7287025940442087, details={'was_impossible': False}),
 Prediction(uid=852, iid=864, r_ui=None, est=3.587317952975382, details={'was_impossible': False}),
 Prediction(uid=852, iid=627, r_ui=None, est=3.2976065202896345, details={'was_impossible': False}))

In [184]:
model.predict(852, 380)

Prediction(uid=852, iid=380, r_ui=None, est=3.7370081853140125, details={'was_impossible': False})

In [183]:
movies.loc[movies['movieId'] == '380']

Unnamed: 0,genres,movieId,title
1852,[Drama],380,Rain Man


In [112]:
model.predict(464, 260), model.predict(464, 222), model.predict(464, 610), model.predict(464, 349),  model.predict(464, 588),  model.predict(464, 262),  model.predict(464, 163),  model.predict(464, 165),  model.predict(464, 380),  model.predict(464, 592)

(Prediction(uid=464, iid=260, r_ui=None, est=4.597617132709338, details={'was_impossible': False}),
 Prediction(uid=464, iid=222, r_ui=None, est=4.372819379091483, details={'was_impossible': False}),
 Prediction(uid=464, iid=610, r_ui=None, est=4.343814906578933, details={'was_impossible': False}),
 Prediction(uid=464, iid=349, r_ui=None, est=4.326619332052402, details={'was_impossible': False}),
 Prediction(uid=464, iid=588, r_ui=None, est=4.282097971337215, details={'was_impossible': False}),
 Prediction(uid=464, iid=262, r_ui=None, est=4.271235120745371, details={'was_impossible': False}),
 Prediction(uid=464, iid=163, r_ui=None, est=4.191532241220789, details={'was_impossible': False}),
 Prediction(uid=464, iid=165, r_ui=None, est=4.08221836698955, details={'was_impossible': False}),
 Prediction(uid=464, iid=380, r_ui=None, est=4.062065259147693, details={'was_impossible': False}),
 Prediction(uid=464, iid=592, r_ui=None, est=4.0542182594177385, details={'was_impossible': False}))