In [65]:
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, accuracy, SVD, NMF
from surprise.model_selection import train_test_split
import surprise.prediction_algorithms.knns as knns
import surprise.prediction_algorithms.matrix_factorization

In [None]:
# We load all the variables we care about
ratings= pd.read_csv("ml-100k/ratings.csv")
movies = pd.read_csv('ml-100k/movies.csv')
tags = pd.read_csv('ml-100k/tags.csv')
links = pd.read_csv('ml-100k/links.csv')


In [15]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
reader = Reader(rating_scale=(1,5))
# Train/Test split using pandas alone
#train = ratings.sample(frac=0.75, random_state=1234)
#test = ratings.drop(train.index)
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

In [None]:
trainset, testset = train_test_split(data, test_size=0.25, random_state=1234)
algo = knns.KNNBasic(sim_options={"name": "pearson"})
algo.fit(trainset)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7927842843b0>

In [None]:
trainset, testset = train_test_split(ratings, test_size=0.25, random_state=1234)

In [26]:
algo = knns.KNNBasic(sim_options={"name": "pearson"})
algo.fit(trainset)
predictions = algo.test(testset)

Computing the pearson similarity matrix...
Done computing similarity matrix.


In [None]:

knn_prod = knns.KNNBasic(sim_options={"name": "pearson", 'user_based': False})
knn_prod.fit(trainset)
knn_prod_pred = knn_prod.test(testset)

Computing the pearson similarity matrix...
Done computing similarity matrix.


In [None]:
svd = SVD()
svd.fit(trainset)
svd_predictions = svd.test(testset)
uuid= []
[uuid.append(pred.uid) for pred in svd_predictions]
len(uuid)/ len(set(uuid))

41.32622950819672

In [66]:
nmf = NMF()
nmf.fit(trainset)
nmf_predictions = svd.test(testset)

In [47]:
[movies[movies['movieId'] == recomended_id]['title'] for recomended_id in [item.iid for item in svd_predictions[0:5]]]

[9433    Rogue One: A Star Wars Story (2016)
 Name: title, dtype: object,
 922    Godfather: Part II, The (1974)
 Name: title, dtype: object,
 1445    Breakfast Club, The (1985)
 Name: title, dtype: object,
 123    Apollo 13 (1995)
 Name: title, dtype: object,
 1883    Office Space (1999)
 Name: title, dtype: object]

In [60]:
def get_titles(prediction, n, movies):
    return [movies.loc[movies['movieId'] == recomended_id, 'title'].item() for recomended_id in [item.iid for item in prediction[0:n]]]

In [97]:
import itertools

[algo.predict(uid, iid) for uid, iid in (itertools.product([test[0] for test in testset], movies['movieId']))]

KeyboardInterrupt: 

In [75]:
get_titles(predictions, 30, movies)

['Rogue One: A Star Wars Story (2016)',
 'Godfather: Part II, The (1974)',
 'Breakfast Club, The (1985)',
 'Apollo 13 (1995)',
 'Office Space (1999)',
 'Logan (2017)',
 'Tucker & Dale vs Evil (2010)',
 'Easy Rider (1969)',
 'Purge, The (2013)',
 'Guardians of the Galaxy (2014)',
 'Dreamers, The (2003)',
 'Congo (1995)',
 'Talented Mr. Ripley, The (1999)',
 'Scanner Darkly, A (2006)',
 'Get Shorty (1995)',
 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)',
 'Dogtown and Z-Boyz (2001)',
 'Fight Club (1999)',
 'Hard Candy (2005)',
 'Bruce Almighty (2003)',
 'Chamber, The (1996)',
 'American President, The (1995)',
 'Mission: Impossible (1996)',
 'Forrest Gump (1994)',
 'Taxi 4 (2007)',
 'Snowpiercer (2013)',
 'Truman Show, The (1998)',
 'Desperately Seeking Susan (1985)',
 'Twister (1996)',
 'Beauty and the Beast (1991)']

In [76]:
get_titles(svd_predictions, 30, movies)

['Rogue One: A Star Wars Story (2016)',
 'Godfather: Part II, The (1974)',
 'Breakfast Club, The (1985)',
 'Apollo 13 (1995)',
 'Office Space (1999)',
 'Logan (2017)',
 'Tucker & Dale vs Evil (2010)',
 'Easy Rider (1969)',
 'Purge, The (2013)',
 'Guardians of the Galaxy (2014)',
 'Dreamers, The (2003)',
 'Congo (1995)',
 'Talented Mr. Ripley, The (1999)',
 'Scanner Darkly, A (2006)',
 'Get Shorty (1995)',
 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)',
 'Dogtown and Z-Boyz (2001)',
 'Fight Club (1999)',
 'Hard Candy (2005)',
 'Bruce Almighty (2003)',
 'Chamber, The (1996)',
 'American President, The (1995)',
 'Mission: Impossible (1996)',
 'Forrest Gump (1994)',
 'Taxi 4 (2007)',
 'Snowpiercer (2013)',
 'Truman Show, The (1998)',
 'Desperately Seeking Susan (1985)',
 'Twister (1996)',
 'Beauty and the Beast (1991)']

In [61]:
get_titles(knn_prod_pred, 10, movies)

['Rogue One: A Star Wars Story (2016)',
 'Godfather: Part II, The (1974)',
 'Breakfast Club, The (1985)',
 'Apollo 13 (1995)',
 'Office Space (1999)',
 'Logan (2017)',
 'Tucker & Dale vs Evil (2010)',
 'Easy Rider (1969)',
 'Purge, The (2013)',
 'Guardians of the Galaxy (2014)']

In [67]:
get_titles(nmf_predictions, 10, movies)

['Rogue One: A Star Wars Story (2016)',
 'Godfather: Part II, The (1974)',
 'Breakfast Club, The (1985)',
 'Apollo 13 (1995)',
 'Office Space (1999)',
 'Logan (2017)',
 'Tucker & Dale vs Evil (2010)',
 'Easy Rider (1969)',
 'Purge, The (2013)',
 'Guardians of the Galaxy (2014)']