In [1]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from collections import defaultdict
import tabulate
import csv
import pickle
from numpy import genfromtxt
from recsysNN_utils import *

In [2]:
x_train_user = pd.read_csv("csv/x_train_user.csv")
x_train_item = pd.read_csv("csv/x_train_item.csv")
y_train = pd.read_csv("csv/y_train.csv")

with open('umr_dict.pickle', 'rb') as f:
    # Load the dictionary from the file using pickle
    umr_dict = pickle.load(f)

print(umr_dict)

{1: {'movies': {1: 4.0, 2115: 5.0, 2161: 5.0, 2174: 4.0, 2193: 4.0, 2268: 4.0, 2273: 4.0, 2291: 5.0, 2329: 5.0, 2353: 5.0, 2395: 5.0, 2406: 4.0, 2427: 5.0, 2470: 5.0, 2502: 5.0, 2529: 5.0, 2105: 4.0, 2542: 5.0, 2094: 5.0, 2054: 4.0, 1552: 4.0, 1573: 5.0, 1580: 3.0, 1617: 5.0, 1625: 5.0, 1676: 3.0, 1732: 5.0, 1777: 4.0, 1954: 5.0, 1967: 4.0, 2000: 4.0, 2005: 5.0, 2012: 4.0, 2018: 5.0, 2028: 4.0, 2078: 5.0, 2571: 5.0, 2580: 5.0, 2617: 2.0, 3052: 5.0, 3147: 5.0, 3176: 1.0, 3247: 3.0, 3253: 5.0, 3448: 5.0, 3489: 4.0, 3527: 4.0, 3578: 5.0, 3617: 4.0, 3671: 5.0, 3702: 5.0, 3703: 5.0, 3793: 5.0, 5060: 5.0, 3034: 5.0, 3033: 5.0, 2997: 4.0, 2987: 5.0, 2628: 4.0, 2640: 4.0, 2641: 5.0, 2657: 3.0, 2692: 5.0, 2700: 5.0, 2716: 5.0, 1517: 5.0, 2761: 5.0, 2858: 5.0, 2916: 4.0, 2947: 5.0, 2948: 5.0, 2949: 5.0, 2959: 5.0, 2985: 4.0, 2797: 4.0, 1500: 4.0, 3809: 4.0, 1396: 3.0, 367: 4.0, 441: 4.0, 457: 5.0, 480: 4.0, 500: 3.0, 527: 5.0, 543: 4.0, 552: 4.0, 553: 5.0, 590: 4.0, 592: 4.0, 593: 4.0, 596: 5.0,

In [3]:
movie_dict = defaultdict(dict)
count = 0
with open('csv/movie_list.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for line in reader:
            if count == 0:
                count += 1  #skip header
                #print(line) print
            else:
                count += 1
                movie_id = int(line[0])
                movie_dict[movie_id]["title"] = line[1]
                movie_dict[movie_id]["genres"] = line[2]

In [4]:
print(f'train_user shape:{x_train_user.shape}')
print(f'train_item shape:{x_train_item.shape}')
print(f'y_train shape:{y_train.shape}')

train_user shape:(55658, 17)
train_item shape:(55658, 17)
y_train shape:(55658, 1)


In [5]:
# scale training data
unscaled_train_item = x_train_item
unscaled_train_user = x_train_user
y_train_unscaled    = y_train

scalerItem = StandardScaler()#scalerItem = StandardScaler()
scalerItem.fit(x_train_item)#scalerItem.fit(item_train)
x_train_item = scalerItem.transform(x_train_item)#item_train = scalerItem.transform(item_train)

scalerUser = StandardScaler()#scalerUser = StandardScaler()
scalerUser.fit(x_train_user)#scalerUser.fit(user_train)
x_train_user = scalerUser.transform(x_train_user)#user_train = scalerUser.transform(user_train)

scalerTarget = MinMaxScaler((-1, 1))#scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.values.reshape(-1, 1))#scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.values.reshape(-1, 1))#y_train = scalerTarget.transform(y_train.reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

print(np.allclose(unscaled_train_item, scalerItem.inverse_transform(x_train_item)))
print(np.allclose(unscaled_train_user, scalerUser.inverse_transform(x_train_user)))

True
True


In [6]:
x_train_item, item_test = train_test_split(x_train_item, train_size=0.80, shuffle=True, random_state=1)
x_train_user, user_test = train_test_split(x_train_user, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"movie/item training data shape: {x_train_item.shape}")
print(f"movie/item test data shape: {item_test.shape}")
print(f"movie/user training data shape: {x_train_user.shape}")
print(f"movie/user test data shape: {user_test.shape}")

movie/item training data shape: (44526, 17)
movie/item test data shape: (11132, 17)
movie/user training data shape: (44526, 17)
movie/user test data shape: (11132, 17)


In [7]:
num_user_features = x_train_user.shape[1] - 3
num_item_features = x_train_item.shape[1] - 1
uvs = 3  # user genre vector start
ivs = 3  # item genre vector start
u_s = 3  # start of columns to use in training, user
i_s = 1  # start of columns to use in training, items

In [8]:
with open('deneme_model.pickle', 'rb') as f:
    model = pickle.load(f)

In [9]:
model.evaluate([user_test[:, u_s:], item_test[:, i_s:]], y_test)



0.15956147015094757

In [10]:
new_user_id = 5000
new_rating_ave = 5.0
new_action = 1.0
new_adventure = 3.0
new_animation = 5.0
new_childrens = 4.0
new_comedy = 5.0
new_crime = 1.0
new_documentary = 2.0
new_drama = 4.5
new_fantasy = 3.0
new_horror = 1.0
new_mystery = 3.0
new_romance = 4.0
new_scifi = 2.0
new_thriller = 1.0
new_rating_count = 500
#user id,rating count,rating ave,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,***film-noir**Horror,**imax**,***musical***Mystery,Romance,Sci-Fi,Thriller***war***,***western***

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
                      new_action, new_adventure, new_animation, new_childrens,
                      new_comedy, new_crime, new_documentary,
                      new_drama, new_fantasy, new_horror, new_mystery,
                      new_romance, new_scifi, new_thriller]])

In [11]:
item_vecs = genfromtxt('./csv/item_vecs.csv', delimiter=',')

In [12]:
# generate and replicate the user vector to match the number movies in the data set.
user_vecs = gen_user_vecs(user_vec,len(item_vecs))

# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display

print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 20)





y_p,movie id,rating ave,title,genres
4.1,88125,3.9,Harry Potter and the Deathly Hallows: Part 2 (2011),Action|Adventure|Drama|Fantasy|Mystery|IMAX
4.1,29,4.0,"City of Lost Children, The (CitÃ© des enfants perdus, La) (1995)",Adventure|Drama|Fantasy|Mystery|Sci-Fi
4.1,2890,3.7,Three Kings (1999),Action|Adventure|Comedy|Drama|War
4.1,648,3.5,Mission: Impossible (1996),Action|Adventure|Mystery|Thriller
4.0,904,4.3,Rear Window (1954),Mystery|Thriller
4.0,2762,3.9,"Sixth Sense, The (1999)",Drama|Horror|Mystery
4.0,31696,3.5,Constantine (2005),Action|Fantasy|Horror|Thriller
4.0,2288,3.9,"Thing, The (1982)",Action|Horror|Sci-Fi|Thriller
4.0,1320,3.1,AlienÂ³ (a.k.a. Alien 3) (1992),Action|Horror|Sci-Fi|Thriller
4.0,1387,4.0,Jaws (1975),Action|Horror


In [13]:
uid = 1 
# form a set of user vectors. This is the same vector, transformed and repeated.
user_vecs, y_vecs = get_user_vecs(uid, unscaled_train_user.to_numpy(), item_vecs, umr_dict)

# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display
sorted_user  = user_vecs[sorted_index]
sorted_y     = y_vecs[sorted_index]

#print sorted predictions for movies rated by the user
print_existing_user(sorted_ypu, sorted_y.reshape(-1,1), sorted_user, sorted_items, ivs, uvs, movie_dict, maxcount = 50)





y_p,y,user,user genre ave,movie rating ave,movie id,title,genres
4.4,5.0,1,"[4.3,4.4,4.2]",3.8,2948,From Russia with Love (1963),Action|Adventure|Thriller
4.4,5.0,1,"[4.3,4.4,4.2]",3.8,2947,Goldfinger (1964),Action|Adventure|Thriller
4.4,5.0,1,"[4.3,4.4,4.2]",3.7,2949,Dr. No (1962),Action|Adventure|Thriller
4.4,5.0,1,"[4.3,4.4,4.3]",3.5,1517,Austin Powers: International Man of Mystery (1997),Action|Adventure|Comedy
4.4,2.0,1,"[4.3,4.4,4.3,4.5,3.0,4.2]",3.1,2617,"Mummy, The (1999)",Action|Adventure|Comedy|Fantasy|Horror|Thriller
4.4,3.0,1,"[4.3,4.3,3.0,4.2]",3.5,70,From Dusk Till Dawn (1996),Action|Comedy|Horror|Thriller
4.4,3.0,1,"[4.3,3.0,4.3]",3.3,2657,"Rocky Horror Picture Show, The (1975)",Comedy|Horror|Musical|Sci-Fi
4.4,5.0,1,"[4.3,4.3]",3.8,1220,"Blues Brothers, The (1980)",Action|Comedy|Musical
4.4,4.0,1,"[3.0,4.3]",4.0,1214,Alien (1979),Horror|Sci-Fi
4.4,5.0,1,"[4.4,4.8,4.8,4.5]",3.4,1032,Alice in Wonderland (1951),Adventure|Animation|Children|Fantasy|Musical
