In [1]:
import pandas as pd
from surprise import Dataset
from surprise import Reader

df = pd.read_csv('data/user_reviews.csv')

# creates list of all movies
movies = list(df.columns)
movies = set(movies[2:])

# removes (first) column with indexes
df.pop(df.columns[0])

# reformating the data to have columns [user, movie, rating]
df = df.melt(id_vars=['User'], value_vars=df.columns[~df.columns.isin(['User'])], var_name='movie', value_name='rating')
df = df[df['rating'] != 0]

In [28]:
from surprise import KNNWithMeans
from surprise import KNNBasic

reader = Reader(rating_scale=(1, 5))

data = Dataset.load_from_df(df[["User", "movie", "rating"]], reader)

sim_options = {
    "name": "msd",
    "user_based": True,
    'min_support': 3  
    }

alg = KNNBasic(sim_options=sim_options)

training_set = data.build_full_trainset()

alg.fit(training_set)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7f9210861a00>

In [29]:
def predict_ratings(user):

    unrated_movies = movies-get_movies(user)
    predictions = []
    
    for movie in unrated_movies:
        predictions.append((movie, alg.predict(user, movie).est))

    return predictions

def get_movies(user):
    users_ratings = df.loc[df['User'] == user]
    return set(users_ratings['movie'])

def get_top_n_recs(user, n):
    all_ratings = predict_ratings(user)
    sorted_ratings = sorted(all_ratings, key=lambda x: x[1], reverse=True)
    first_n = sorted_ratings[:n]

    return([x[0] for x in first_n])


In [30]:
users =  ['Vincent', 'Edgar', 'Addilyn', 'Marlee', 'Javier']

for user in users:
    recommendations = get_top_n_recs(user, 5)
    rec = ", ".join(recommendations)
    print(f'Reccomendations for {user}: {rec}')

Reccomendations for Vincent: Maximum Risk, My Best Friend's Wedding, Robin and Marian, Dream with the Fishes, Four Christmases
Reccomendations for Edgar: Street Kings, Ghost World, The Juror, Stealth, As It Is in Heaven
Reccomendations for Addilyn: Lords of Dogtown, Underworld: Rise of the Lycans, Modern Problems, Desert Blue, Bandits
Reccomendations for Marlee: It's a Wonderful Afterlife, The Queen, Hits, In the Company of Men, Mad City
Reccomendations for Javier: Lovesick, Broken City, Life of Pi, Lords of London, Last Action Hero


In [31]:
from surprise import KNNWithMeans
from surprise import KNNBasic
from surprise import Dataset
from surprise.model_selection import GridSearchCV

sim_options = {
    "name": ["msd", "cosine", "pearson", "pearson_baseline"],
    "min_support": [1, 2, 3], #, 4],#, 4, 5, 6, 7, 8, 9],
    "user_based": [True],
}

param_grid = {"sim_options": sim_options}

gs = GridSearchCV(KNNBasic, param_grid, measures=["rmse", "mae"], cv=3)
gs.fit(data)

print(gs.best_score["rmse"])
print(gs.best_params["rmse"])
print(gs.best_score["mae"])
print(gs.best_params["mae"])

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix.