In [1]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from collections import defaultdict
import tabulate
import csv
import pickle
from numpy import genfromtxt
from recsysNN_utils import *

In [2]:
x_train_user = pd.read_csv("csv/x_train_user.csv")
x_train_item = pd.read_csv("csv/x_train_item.csv")
y_train = pd.read_csv("csv/y_train.csv")

with open('umr_dict.pickle', 'rb') as f:
    # Load the dictionary from the file using pickle
    umr_dict = pickle.load(f)

print(umr_dict)

{1: {'movies': {1: 4.0, 2273: 4.0, 2291: 5.0, 2329: 5.0, 2353: 5.0, 2366: 4.0, 2395: 5.0, 2406: 4.0, 2427: 5.0, 2268: 4.0, 2470: 5.0, 2502: 5.0, 2528: 3.0, 2529: 5.0, 2542: 5.0, 2571: 5.0, 2580: 5.0, 2616: 4.0, 2617: 2.0, 2478: 4.0, 2193: 4.0, 2174: 4.0, 2161: 5.0, 1732: 5.0, 1777: 4.0, 1805: 4.0, 1954: 5.0, 1967: 4.0, 2000: 4.0, 2005: 5.0, 2012: 4.0, 2018: 5.0, 2028: 4.0, 2046: 4.0, 2054: 4.0, 2058: 5.0, 2078: 5.0, 2094: 5.0, 2096: 4.0, 2105: 4.0, 2115: 5.0, 2137: 5.0, 2628: 4.0, 2640: 4.0, 2641: 5.0, 2657: 3.0, 3168: 4.0, 3176: 1.0, 3247: 3.0, 3253: 5.0, 3273: 5.0, 3386: 5.0, 3448: 5.0, 3450: 5.0, 3489: 4.0, 3527: 4.0, 3578: 5.0, 3617: 4.0, 3671: 5.0, 3702: 5.0, 3703: 5.0, 3740: 4.0, 3793: 5.0, 3809: 4.0, 5060: 5.0, 3147: 5.0, 1676: 3.0, 3052: 5.0, 3033: 5.0, 2692: 5.0, 2700: 5.0, 2716: 5.0, 2761: 5.0, 2797: 4.0, 2826: 4.0, 2858: 5.0, 2872: 5.0, 2916: 4.0, 2944: 5.0, 2947: 5.0, 2948: 5.0, 2949: 5.0, 2959: 5.0, 2985: 4.0, 2987: 5.0, 2991: 5.0, 2993: 5.0, 2997: 4.0, 3034: 5.0, 1644: 3.

In [3]:
movie_dict = defaultdict(dict)
count = 0
with open('csv/movie_list.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for line in reader:
            if count == 0:
                count += 1  #skip header
                #print(line) print
            else:
                count += 1
                movie_id = int(line[0])
                movie_dict[movie_id]["title"] = line[1]
                movie_dict[movie_id]["genres"] = line[2]

In [4]:
print(f'train_user shape:{x_train_user.shape}')
print(f'train_item shape:{x_train_item.shape}')
print(f'y_train shape:{y_train.shape}')

train_user shape:(66657, 22)
train_item shape:(66657, 22)
y_train shape:(66657, 1)


In [5]:
# scale training data
unscaled_train_item = x_train_item
unscaled_train_user = x_train_user
y_train_unscaled    = y_train

scalerItem = StandardScaler()#scalerItem = StandardScaler()
scalerItem.fit(x_train_item)#scalerItem.fit(item_train)
x_train_item = scalerItem.transform(x_train_item)#item_train = scalerItem.transform(item_train)

scalerUser = StandardScaler()#scalerUser = StandardScaler()
scalerUser.fit(x_train_user)#scalerUser.fit(user_train)
x_train_user = scalerUser.transform(x_train_user)#user_train = scalerUser.transform(user_train)

scalerTarget = MinMaxScaler((-1, 1))#scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.values.reshape(-1, 1))#scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.values.reshape(-1, 1))#y_train = scalerTarget.transform(y_train.reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

print(np.allclose(unscaled_train_item, scalerItem.inverse_transform(x_train_item)))
print(np.allclose(unscaled_train_user, scalerUser.inverse_transform(x_train_user)))

True
True


In [6]:
x_train_item, item_test = train_test_split(x_train_item, train_size=0.80, shuffle=True, random_state=1)
x_train_user, user_test = train_test_split(x_train_user, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"movie/item training data shape: {x_train_item.shape}")
print(f"movie/item test data shape: {item_test.shape}")
print(f"movie/user training data shape: {x_train_user.shape}")
print(f"movie/user test data shape: {user_test.shape}")

movie/item training data shape: (53325, 22)
movie/item test data shape: (13332, 22)
movie/user training data shape: (53325, 22)
movie/user test data shape: (13332, 22)


In [7]:
num_user_features = x_train_user.shape[1] - 3
num_item_features = x_train_item.shape[1] - 1
uvs = 3  # user genre vector start
ivs = 3  # item genre vector start
u_s = 3  # start of columns to use in training, user
i_s = 1  # start of columns to use in training, items

In [8]:
with open('nn_model.pickle', 'rb') as f:
    # Load the dictionary from the file using pickle
    model = pickle.load(f)

In [9]:
model.evaluate([user_test[:, u_s:], item_test[:, i_s:]], y_test)



0.167226642370224

In [10]:
new_user_id = 5000
new_rating_ave = 4.0
new_action = 5.0
new_adventure = 3.0
new_animation = 0.0
new_childrens = 0.0
new_comedy = 0.0
new_crime = 0.0
new_documentary = 0.0
new_drama = 0.0
new_fantasy = 5.0
new_filmnoir = 0.0 ##
new_horror = 4.0
new_imax = 0.0 ##
new_musical = 0.0 ##
new_mystery = 0.0
new_romance = 0.0
new_scifi = 0.0
new_thriller = 5.0
new_western = 0.0 ##
new_war = 0.0 ##
new_rating_count = 7
#user id,rating count,rating ave,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,***film-noir**Horror,**imax**,***musical***Mystery,Romance,Sci-Fi,Thriller***war***,***western***

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave,
                      new_action, new_adventure, new_animation, new_childrens,
                      new_comedy, new_crime, new_documentary,
                      new_drama, new_fantasy, new_filmnoir, new_horror, new_imax, new_musical, new_mystery,
                      new_romance, new_scifi, new_thriller, new_western, new_war]])

In [11]:
item_vecs = genfromtxt('./csv/item_vecs.csv', delimiter=',')

In [12]:
# generate and replicate the user vector to match the number movies in the data set.
user_vecs = gen_user_vecs(user_vec,len(item_vecs))

# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display

print_pred_movies(sorted_ypu, sorted_items, movie_dict, maxcount = 10)





y_p,movie id,rating ave,title,genres
2.5,102125,3.6,Iron Man 3 (2013),Action|Sci-Fi|Thriller|IMAX
2.5,3536,3.5,Keeping the Faith (2000),Comedy|Drama|Romance
2.4,1644,2.1,I Know What You Did Last Summer (1997),Horror|Mystery|Thriller
2.3,5449,2.9,Mr. Deeds (2002),Comedy|Romance
2.3,52,3.5,Mighty Aphrodite (1995),Comedy|Drama|Romance
2.2,4069,2.9,"Wedding Planner, The (2001)",Comedy|Romance
2.2,3704,3.3,Mad Max Beyond Thunderdome (1985),Action|Adventure|Sci-Fi
2.2,1375,3.3,Star Trek III: The Search for Spock (1984),Action|Adventure|Sci-Fi
2.1,2105,3.3,Tron (1982),Action|Adventure|Sci-Fi
2.0,34437,3.5,Broken Flowers (2005),Comedy|Drama


In [13]:
uid = 1 
# form a set of user vectors. This is the same vector, transformed and repeated.
user_vecs, y_vecs = get_user_vecs(uid, unscaled_train_user.to_numpy(), item_vecs, umr_dict)

# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

# make a prediction
y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

# unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]  #using unscaled vectors for display
sorted_user  = user_vecs[sorted_index]
sorted_y     = y_vecs[sorted_index]

#print sorted predictions for movies rated by the user
print_existing_user(sorted_ypu, sorted_y.reshape(-1,1), sorted_user, sorted_items, ivs, uvs, movie_dict, maxcount = 50)





y_p,y,user,user genre ave,movie rating ave,movie id,title,genres
4.8,3.0,1,"[4.3,4.5,4.2]",3.2,2528,Logan's Run (1976),Action|Adventure|Sci-Fi
4.7,5.0,1,"[4.3,4.5,4.5]",3.7,1275,Highlander (1986),Action|Adventure|Fantasy
4.7,5.0,1,"[4.3,4.5,4.5]",3.6,2115,Indiana Jones and the Temple of Doom (1984),Action|Adventure|Fantasy
4.7,5.0,1,"[4.3,4.5]",3.5,2395,Rushmore (1998),Comedy|Drama
4.7,5.0,1,"[4.3,4.5,4.5]",3.3,2427,"Thin Red Line, The (1998)",Action|Drama|War
4.7,5.0,1,[4.5],3.8,1954,Rocky (1976),Drama
4.7,5.0,1,"[4.5,4.8,4.5,4.8]",3.9,919,"Wizard of Oz, The (1939)",Adventure|Children|Fantasy|Musical
4.7,3.0,1,"[4.3,4.4]",3.0,1377,Batman Returns (1992),Action|Crime
4.7,5.0,1,"[4.3,4.5,4.3,4.5]",3.5,151,Rob Roy (1995),Action|Drama|Romance|War
4.6,5.0,1,"[4.5,4.3,4.5]",3.6,3052,Dogma (1999),Adventure|Comedy|Fantasy
