# imports

In [1]:
import pandas as pd
import sqlite3
import numpy as np
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity
from  more_itertools import unique_everseen # deletes duplicates from list keeping their order

In [2]:
def get_data_from_db(database_directory, tablename):
    db = sqlite3.connect(database_directory)
    query = f"SELECT * FROM {tablename}"
    df = pd.read_sql(query, db)
    return df

def create_users_vs_movies_matrix(df):
    df = df.drop("timestamp", axis = 1)
    df = df.set_index(["userId", "movieId"])
    df = df.unstack()
    df = df.fillna(0)
    return df

# def get_filterd_movie_ids(users_vs_movies_matrix):
# Paul's function here

# Load data

In [3]:
d0 = get_data_from_db("data/movies.db", "ratings")

In [4]:
users_vs_movies_matrix = create_users_vs_movies_matrix(d0)

# Collaborative Filtering

In [13]:
# Functions for comparing users against each other using cosine similarity
def make_cosine_heatmap(df):
    labels = list(df.index.values) 
    cosine_similarities = cosine_similarity(df)
    results_df = pd.DataFrame(cosine_similarities, columns = labels, index = labels)
    return results_df

def get_similar_users(cosine_heatmap, userId):
    results_for_one_user = cosine_heatmap.loc[userId].sort_values(ascending = False) # get similar users
    similar_users = list(results_for_one_user.keys())[1:]
    return similar_users

using collaborative filtering

In [14]:
# create fake user input
a = sorted(set(d0["movieId"]))
b = [0]*len(a)
c = dict(zip(a,b))
c[260] = 5.0
c[1196] = 5.0
c[1210] = 5.0
c[2628] = 5.0
c[5378] = 5.0
c[33493] = 5.0
reshaped_user_input = list(c.values()) # get from Paul later
#reshaped_user_input

In [17]:
user_input_df_row

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
#create user row in dataframe
user_input_df_row = pd.DataFrame(reshaped_user_input, index = a).transpose()
user_input_df_row

# add that user row into existing df
users_vs_movies_matrix_complemented = users_vs_movies_matrix.copy()
users_vs_movies_matrix_complemented.loc[0] = reshaped_user_input

# determine ordered list of similar users via cosine similarity
cosine_heatmap = make_cosine_heatmap(users_vs_movies_matrix_complemented)
similar_users = get_similar_users(cosine_heatmap, 0)

# apply filters, e.g. genre etc on dataframe
filters = a  # e.g. [1,2,3,4,5,6,7,8,12,14,16]
filtered_uvmm_complemented =  users_vs_movies_matrix_complemented["rating"][filters]

# go through every similar user, starting from most similar one, check for conditions below and append movieId to list
recommended_movie_ids = []
for user in similar_users:
    checking_movies = ((filtered_uvmm_complemented.loc[0] == 0.0) & (filtered_uvmm_complemented.loc[user] >= 5.0))
    d = dict(checking_movies)
    recommended_movie_ids_from_user = list(filter(d.get, d)) #returning keys (movieids) from dict where value is True
    recommended_movie_ids += recommended_movie_ids_from_user
    recommended_movie_ids = list(unique_everseen(recommended_movie_ids)) # delete duplicates from list keeping their order 
    if len(recommended_movie_ids) >= 20:
        break
    
recommended_movie_ids

[296,
 2028,
 2329,
 32587,
 44191,
 48516,
 1198,
 2571,
 3578,
 3996,
 4993,
 5349,
 5816,
 5952,
 7153,
 8636,
 8961,
 33794,
 58559,
 110,
 589,
 858,
 1036,
 1200,
 1214,
 1221,
 1291]

# NMF

In [278]:
def get_NMF_results_matrix(trained_NMF_model, users_vs_movies_matrix):
    R = users_vs_movies_matrix
    user_ids = list(R.index.values) 
    movie_ids = list(R.columns.get_level_values(1).values) 
    P = model.components_  # movie feature
    Q = model.transform(R)  # user feature
    nR = np.dot(Q, P)
    results = pd.DataFrame(nR, columns = movie_ids, index = user_ids)
    return results

In [284]:
NMF_results_matrix = get_NMF_results_matrix(model, R)

In [168]:
NMF_results_matrix

In [5]:
# Train model
R = users_vs_movies_matrix
model = NMF(n_components=10, init='random', random_state=10)
model.fit(R)
P = model.components_
Q = model.transform(R)

using NMF

In [6]:
# create fake user input
a = sorted(set(d0["movieId"]))
b = [0]*len(a)
c = dict(zip(a,b))
c[260] = 5.0
c[1196] = 5.0
c[1210] = 5.0
c[2628] = 5.0
c[5378] = 5.0
c[33493] = 5.0
reshaped_user_input = list(c.values()) # get from Paul later
#len(reshaped_user_input)

In [293]:
#testing other user as userinput
#reshaped_user_input = list(users_vs_movies_matrix.loc[1])
#len(user_input)

In [12]:
# apply NMF for user
user_Q = model.transform([reshaped_user_input])
recommendations_user = np.dot(user_Q, P)

# creating list of NMF_score and movieIds
recommendations_user = list(recommendations_user[0])
recommendations_with_movie_id = dict(zip(a, recommendations_user))

# applying filters
filters = a # [1,2,3,4,5,6,7,8,12,14,16]
recommendations_with_movie_id_filtered = list([[x,recommendations_with_movie_id[x]] for x in filters])

#putting results in dataframe
results = pd.DataFrame(recommendations_with_movie_id_filtered, columns = ["movieId", "NMF_score"])
results = results.sort_values(by = "NMF_score", ascending = False)
#output = list(results["movieId"])
results.head(10)
#output

Unnamed: 0,movieId,NMF_score
897,1196,0.376323
224,260,0.363021
910,1210,0.350568
1938,2571,0.317368
899,1198,0.306459
938,1240,0.293078
989,1291,0.28945
1502,2028,0.287778
507,589,0.273941
968,1270,0.268413


In [None]:
# we need to remove results from the list that were rated by user in the beginning.