In [1]:
# !apt-get install python-dev libopenblas-dev
# !git clone --recursive https://github.com/ibayer/fastFM.git
# import os
# os.chdir("./fastFM/")
# !pip install -r ./requirements.txt
# !make
# !PYTHON=python3 make
# !pip install .

In [2]:
import os
path = "./"
os.chdir(path)

In [3]:
import gzip
import numpy as np
import random
import scipy
from collections import defaultdict
from fastFM import als
from scipy.spatial import distance
import csv
import dateutil.parser
from datetime import timedelta
import pickle

In [4]:
def MSE(predictions, labels):
    differences = [(x-y)**2 for x,y in zip(predictions,labels)]
    return sum(differences) / len(differences)

In [None]:
with open("Sampled_by_review_count_train_state.pkl","rb") as fp:
    train_dicts = pickle.load(fp)

with open("Sampled_by_review_count_val_state.pkl","rb") as fp:
    val_dicts = pickle.load(fp)

with open("Sampled_by_review_count_test_state.pkl","rb") as fp:
    test_dicts = pickle.load(fp)

In [19]:
all_dicts = train_dicts + val_dicts + test_dicts

In [6]:
train_val_dicts = train_dicts + val_dicts

In [None]:
with open("Price_filtered_places.pkl","rb") as fp:
    filtered_places = pickle.load(fp)

In [10]:
#Places mapping from place id to its attributes
places_meta_data = defaultdict(dict)
for d in filtered_places:
  places_meta_data[d['gPlusPlaceId']] = d

In [11]:
train_dicts[0]

{'rating': 4.0,
 'reviewerName': 'william spindler',
 'reviewText': 'Best War Wanton soup in Red Bluff',
 'categories': ['Asian Restaurant', 'Chinese Restaurant'],
 'gPlusPlaceId': '106591714648856494903',
 'unixReviewTime': 1394669496,
 'reviewTime': 'Mar 12, 2014',
 'gPlusUserId': '100000032416892623125',
 'gps': [40.179159, -122.236162],
 'parsed_time': 0.9989914778876909,
 'state': 'CA'}

In [20]:
users = set([x['gPlusUserId'] for x in all_dicts])
nUsers = len(users)
places = set([x['gPlusPlaceId'] for x in all_dicts])
nPlaces = len(places)

prices = set([x['price'] for x in filtered_places])
nPrices = len(prices)

user_di = {user:i for i,user in enumerate(list(users))}
place_di = {place:i for i,place in enumerate(list(places))}
price_di = {price:i for i,price in enumerate(list(prices))}

In [None]:
X = scipy.sparse.lil_matrix((len(train_val_dicts), nUsers + nPlaces + nPrices))

for i in range(len(train_val_dicts)):
    if len(places_meta_data[train_val_dicts[i]['gPlusPlaceId']]) != 0 and train_val_dicts[i]['gPlusUserId'] in user_di:
        user = user_di[train_val_dicts[i]['gPlusUserId']]
        item = place_di[train_val_dicts[i]['gPlusPlaceId']]
        price = price_di[places_meta_data[train_val_dicts[i]['gPlusPlaceId']]['price']]
        X[i,user] = 1 # One-hot encoding of user
        X[i,nUsers + item] = 1 # One-hot encoding of item
        X[i,nUsers + nPlaces+price] = 1

y = np.array([d['rating'] for d in train_val_dicts])
print(X, y)

In [495]:
def keep_non_empty_rows(X, y):
    X = X.tocsr()
    non_empty_mask = X.getnnz(axis=1) > 0
    return X[non_empty_mask], y[non_empty_mask]

X, y = keep_non_empty_rows(X, y)

In [497]:
split = int(0.8*X.shape[0])
X_train,y_train = X[:split],y[:split]
X_val,y_val = X[split:],y[split:]

In [None]:
X_test = scipy.sparse.lil_matrix((len(test_dicts), nUsers + nPlaces + nPrices))

for i in range(len(test_dicts)):
  user_id = test_dicts[i]['gPlusUserId']

  place_id = test_dicts[i]['gPlusPlaceId']

  if(user_id in user_di and place_id in place_di and len(places_meta_data[train_val_dicts[i]['gPlusPlaceId']]) != 0):

    user = user_di[user_id]
    item = place_di[place_id]

    price = price_di[places_meta_data[train_val_dicts[i]['gPlusPlaceId']]['price']]

    X_test[i,user] = 1 # One-hot encoding of user
    X_test[i,nUsers + item] = 1 # One-hot encoding of item
    X_test[i,nUsers + nPlaces+price] = 1

  elif(user_id in user_di):
    user = user_di[user_id]
    X_test[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di and len(places_meta_data[train_val_dicts[i]['gPlusPlaceId']]) != 0):
    item = place_di[place_id]
    price = price_di[places_meta_data[train_val_dicts[i]['gPlusPlaceId']]['price']]
    X_test[i,nUsers + item] = 1 # One-hot encoding of item
    X_test[i,nUsers + nPlaces+price] = 1

y_test = np.array([d['rating'] for d in test_dicts])

In [None]:
X_test, y_test = keep_non_empty_rows(X_test, y_test)
X_test

In [500]:
fm = als.FMRegression(n_iter=500, init_stdev=0.1, rank=3, l2_reg_w=20, l2_reg_V=50)
fm.fit(X_train, y_train)

In [None]:
y_pred_train = fm.predict(X_train)
print(MSE(y_pred_train, y_train))

y_pred_val = fm.predict(X_val)
print(MSE(y_pred_val, y_val))

y_test_pred = fm.predict(X_test)

In [21]:
time_li = [d['unixReviewTime'] for d in all_dicts if d['unixReviewTime']]
max_time = max(time_li)

users = set([x['gPlusUserId'] for x in all_dicts])
nUsers = len(users)
places = set([x['gPlusPlaceId'] for x in all_dicts])
nPlaces = len(places)

prices = set([x['price'] for x in filtered_places])
nPrices = len(prices)

user_di = {user:i for i,user in enumerate(list(users))}
place_di = {place:i for i,place in enumerate(list(places))}
price_di = {price:i for i,price in enumerate(list(prices))}

In [24]:
mod_train_dicts = []
allRatings = []
ratingDict = {}
userRatings = defaultdict(list) #all ratings given by user u
placeRatings = defaultdict(list)
usersPerPlace = defaultdict(set) # Maps an item to the users who rated it
placesPerUser = defaultdict(set) # Maps a user to the items that they rated

for d in all_dicts:
  user = d['gPlusUserId']
  place = d['gPlusPlaceId']
  usersPerPlace[place].add(user)
  placesPerUser[user].add(place)
  r = int(d['rating'])
  d['rating'] = r
  ratingDict[(user,place)] = r
  d['parsed_time'] = d['unixReviewTime']/max_time
  # for key in recipe_meta_data[place]:
  #   d[key] = recipe_meta_data[place][key]
  allRatings.append(r)
  userRatings[user].append((r,d['parsed_time']))
  placeRatings[place].append((r,d['parsed_time']))
  mod_train_dicts.append(d)

globalAverage = sum(allRatings) / len(allRatings)
userAverage = {}
for u in userRatings:
  userAverage[u] = sum([t[0] for t in userRatings[u]]) / len(userRatings[u])

In [None]:
review_count_per_user = [len(placesPerUser[user]) for user in placesPerUser]
avg_review_count = sum(review_count_per_user)/len(review_count_per_user)
avg_review_count

In [54]:
sorted_review_count = sorted(review_count_per_user,reverse=True)

In [None]:
#Sampling scheme

users_with_non1_rcount = [user for user in placesPerUser if len(placesPerUser[user]) >=2 ]
users_with_1_rcount = [user for user in placesPerUser if len(placesPerUser[user]) ==1 ]
users_with_1_rcount_subset = [random.choice(users_with_1_rcount) for _ in range(40000)]
users_with_non1_rcount.extend(users_with_1_rcount_subset)
user_list_sampled = set(users_with_non1_rcount)
sampled_train_dict = [d for d in train_dicts if d['gPlusUserId'] in user_list_sampled]

print(len(user_list_sampled))
print(len(sampled_train_dict))

In [511]:
y_val_glob_avg = [globalAverage]*len(y_val)

In [None]:
MSE(y_val,y_val_glob_avg)

Vanilla FM

In [None]:
X_train = scipy.sparse.lil_matrix((len(train_dicts), nUsers + nPlaces))

for i in range(len(train_dicts)):
    user = user_di[train_dicts[i]['gPlusUserId']]
    item = place_di[train_dicts[i]['gPlusPlaceId']]
    X_train[i,user] = 1 # One-hot encoding of user
    X_train[i,nUsers + item] = 1 # One-hot encoding of item

y_train = np.array([d['rating'] for d in train_dicts])

In [None]:
X_val = scipy.sparse.lil_matrix((len(val_dicts), nUsers + nPlaces))

for i in range(len(val_dicts)):
  user_id = val_dicts[i]['gPlusUserId']

  place_id = val_dicts[i]['gPlusPlaceId']

  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    X_val[i,user] = 1 # One-hot encoding of user
    X_val[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_val[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_val[i,nUsers + item] = 1 # One-hot encoding of item

y_val = np.array([d['rating'] for d in val_dicts])

In [None]:
X_test = scipy.sparse.lil_matrix((len(test_dicts), nUsers + nPlaces))

for i in range(len(test_dicts)):
  user_id = test_dicts[i]['gPlusUserId']
  place_id = test_dicts[i]['gPlusPlaceId']
  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    X_test[i,user] = 1 # One-hot encoding of user
    X_test[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_test[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_test[i,nUsers + item] = 1 # One-hot encoding of item

y_test = np.array([d['rating'] for d in test_dicts])

In [None]:
fm = als.FMRegression(init_stdev=0.1, rank=0, l2_reg_w=20)
fm.fit(X_train, y_train)

In [None]:
y_pred_train = fm.predict(X_train)
print(MSE(y_pred_train, y_train))

y_pred_val = fm.predict(X_val)
print(MSE(y_pred_val, y_val))

y_test_pred = fm.predict(X_test)

FastFM with state feature

In [None]:
state_list = ["AL","AK","AZ","AR","CA","CO","CT","DE","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY",""]
state_coords = [(32.806671,-86.79113),	(61.370716,-152.404419),	(33.729759,-111.431221),	(34.969704,-92.373123),	(36.116203,-119.681564),	(39.059811,-105.311104),	(41.597782,-72.755371),	(39.318523,-75.507141),	(27.766279,-81.686783),	(33.040619,-83.643074),	(21.094318,-157.498337),	(44.240459,-114.478828),	(40.349457,-88.986137),	(39.849426,-86.258278),	(42.011539,-93.210526),	(38.5266,-96.726486),	(37.66814,-84.670067),	(31.169546,-91.867805),	(44.693947,-69.381927),	(39.063946,-76.802101),	(42.230171,-71.530106),	(43.326618,-84.536095),	(45.694454,-93.900192),	(32.741646,-89.678696),	(38.456085,-92.288368),	(46.921925,-110.454353),	(41.12537,-98.268082),	(38.313515,-117.055374),	(43.452492,-71.563896),	(40.298904,-74.521011),	(34.840515,-106.248482),	(42.165726,-74.948051),	(35.630066,-79.806419),	(47.528912,-99.784012),	(40.388783,-82.764915),	(35.565342,-96.928917),	(44.572021,-122.070938),	(40.590752,-77.209755),	(41.680893,-71.51178),	(33.856892,-80.945007),	(44.299782,-99.438828),	(35.747845,-86.692345),	(31.054487,-97.563461),	(40.150032,-111.862434),	(44.045876,-72.710686),	(37.769337,-78.169968),	(47.400902,-121.490494),	(38.491226,-80.954453),	(44.268543,-89.616508),	(42.755966,-107.30249), (0.0,0.0)]

In [None]:
nStates = 51
X_train = scipy.sparse.lil_matrix((len(train_dicts), nUsers + nPlaces+nStates))

for i in range(len(train_dicts)):
    user = user_di[train_dicts[i]['gPlusUserId']]
    item = place_di[train_dicts[i]['gPlusPlaceId']]
    state = state_list.index(train_dicts[i]['state'])
    X_train[i,user] = 1 # One-hot encoding of user
    X_train[i,nUsers + item] = 1 # One-hot encoding of item
    X_train[i,nUsers + nPlaces + state] = 1

y_train = np.array([d['rating'] for d in train_dicts])

In [520]:
train_dicts[1]

{'rating': 5,
 'reviewerName': 'william spindler',
 'reviewText': "This is a review that is long overdo. I've been enjoying the great pizza at this restaurant for over a year now and it's hands down the best in town. I saw a review on here saying the pizza was greasy, and I'll bet it was for a peperoni pizza. I don't know how you could put so many slices of peperoni on, where you can't even see the cheese, without a little pepperoni grease escaping. These guys at Firehouse make the pizzas that everyone else does in their commercials, you can watch as they pile the toppings a mile high on every pie. And when was the last time you saw someone slicing bell peppers and tomatoes by hand for your pizza. I honestly have a hard time understanding how these guys stay in business, but I'm glad they do. And the five dollar slice and soda lunch is probably the best value in town. Thanks guys, keep it up.",
 'categories': ['European Restaurant',
  'Italian Restaurant',
  'Pizza Restaurant'],
 'gPlu

In [None]:
X_val = scipy.sparse.lil_matrix((len(val_dicts), nUsers + nPlaces+nStates))

for i in range(len(val_dicts)):
  user_id = val_dicts[i]['gPlusUserId']
  place_id = val_dicts[i]['gPlusPlaceId']
  state = state_list.index(val_dicts[i]['state'])
  X_val[i,nUsers + nPlaces + state] = 1
  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    state = state_list.index(train_dicts[i]['state'])
    X_val[i,user] = 1 # One-hot encoding of user
    X_val[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_val[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_val[i,nUsers + item] = 1 # One-hot encoding of item

y_val = np.array([d['rating'] for d in val_dicts])

In [None]:
X_test = scipy.sparse.lil_matrix((len(test_dicts), nUsers + nPlaces+nStates))

for i in range(len(test_dicts)):
  user_id = test_dicts[i]['gPlusUserId']
  place_id = test_dicts[i]['gPlusPlaceId']
  state = state_list.index(test_dicts[i]['state'])
  X_test[i,nUsers + nPlaces + state] = 1
  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    X_test[i,user] = 1 # One-hot encoding of user
    X_test[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_test[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_test[i,nUsers + item] = 1 # One-hot encoding of item

y_test = np.array([d['rating'] for d in test_dicts])

In [None]:
fm1 = als.FMRegression(init_stdev=0.1, rank=0, l2_reg_w=20)
fm1.fit(X_train, y_train)

In [None]:
y_pred_train = fm1.predict(X_train)
print(MSE(y_pred_train, y_train))

y_pred_val = fm1.predict(X_val)
print(MSE(y_pred_val, y_val))

y_test_pred = fm1.predict(X_test)

FISM with just places

In [30]:
X_train_fism = scipy.sparse.lil_matrix((len(train_dicts), nPlaces*2))

for i in range(len(train_dicts)):
    user_id = train_dicts[i]['gPlusUserId']
    item = place_di[train_dicts[i]['gPlusPlaceId']]
    history = placesPerUser[user_id]
    for j in history:
      if train_dicts[i]['gPlusPlaceId'] == j: continue
      X_train_fism[i, place_di[j]] = 1.0 / (len(history)-1)
    X_train_fism[i,nPlaces + item] = 1

y_train_fism = np.array([d['rating'] for d in train_dicts])

In [26]:
X_val_fism = scipy.sparse.lil_matrix((len(val_dicts), nPlaces*2))

for i in range(len(val_dicts)):
  user_id = val_dicts[i]['gPlusUserId']
  item = place_di[val_dicts[i]['gPlusPlaceId']]
  history = placesPerUser[user_id]
  for j in history:
    if val_dicts[i]['gPlusPlaceId'] == j: continue
    X_val_fism[i, place_di[j]] = 1.0 / (len(history)-1)
  X_val_fism[i,nPlaces + item] = 1

y_val_fism = np.array([d['rating'] for d in val_dicts])

In [28]:
X_test_fism = scipy.sparse.lil_matrix((len(test_dicts), nPlaces*2))

for i in range(len(test_dicts)):
  user_id = test_dicts[i]['gPlusUserId']
  item = place_di[test_dicts[i]['gPlusPlaceId']]
  history = placesPerUser[user_id]
  for j in history:
    if test_dicts[i]['gPlusPlaceId'] == j: continue
    X_test_fism[i, place_di[j]] = 1.0 / (len(history)-1)
  X_test_fism[i,nPlaces + item] = 1

y_test_fism = np.array([d['rating'] for d in test_dicts])

In [None]:
fm_fism = als.FMRegression(init_stdev=0.1, rank=5, l2_reg_w=20, l2_reg_V=10)
fm_fism.fit(X_train_fism, y_train_fism)

y_pred_train = fm_fism.predict(X_train_fism)
print(MSE(y_pred_train, y_train_fism))

y_pred_val = fm_fism.predict(X_val_fism)
print(MSE(y_pred_val, y_val_fism))

y_test_pred = fm_fism.predict(X_test_fism)

FISm with state co-ords

In [None]:

X_train = scipy.sparse.lil_matrix((len(train_dicts), nUsers + nPlaces+2))

for i in range(len(train_dicts)):
    user = user_di[train_dicts[i]['gPlusUserId']]
    item = place_di[train_dicts[i]['gPlusPlaceId']]
    state = state_list.index(train_dicts[i]['state'])
    cord = state_coords[state]
    X_train[i,user] = 1 # One-hot encoding of user
    X_train[i,nUsers + item] = 1 # One-hot encoding of item
    X_train[i,nUsers + nPlaces + 0] = cord[0]
    X_train[i,nUsers + nPlaces + 1] = cord[1]

y_train = np.array([d['rating'] for d in train_dicts])

In [None]:
X_val = scipy.sparse.lil_matrix((len(val_dicts), nUsers + nPlaces+2))

for i in range(len(val_dicts)):
  user_id = val_dicts[i]['gPlusUserId']
  place_id = val_dicts[i]['gPlusPlaceId']
  state = state_list.index(val_dicts[i]['state'])
  cord = state_coords[state]
  X_val[i,nUsers + nPlaces + 0] = cord[0]
  X_val[i,nUsers + nPlaces + 1] = cord[1]
  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    state = state_list.index(train_dicts[i]['state'])
    X_val[i,user] = 1 # One-hot encoding of user
    X_val[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_val[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_val[i,nUsers + item] = 1 # One-hot encoding of item

y_val = np.array([d['rating'] for d in val_dicts])

In [None]:
X_test = scipy.sparse.lil_matrix((len(test_dicts), nUsers + nPlaces+2))

for i in range(len(test_dicts)):
  user_id = test_dicts[i]['gPlusUserId']
  place_id = test_dicts[i]['gPlusPlaceId']
  state = state_list.index(test_dicts[i]['state'])
  cord = state_coords[state]
  X_test[i,nUsers + nPlaces + 0] = cord[0]
  X_test[i,nUsers + nPlaces + 1] = cord[1]
  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    X_test[i,user] = 1 # One-hot encoding of user
    X_test[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_test[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_test[i,nUsers + item] = 1 # One-hot encoding of item

y_test = np.array([d['rating'] for d in test_dicts])

In [None]:
fm2 = als.FMRegression(init_stdev=0.1, rank=0, l2_reg_w=20)
fm2.fit(X_train, y_train)

y_pred_train = fm2.predict(X_train)
print(MSE(y_pred_train, y_train))

y_pred_val = fm2.predict(X_val)
print(MSE(y_pred_val, y_val))

y_test_pred = fm2.predict(X_test)

#Fast FM with popularity of place and state

In [42]:
placesPerState = defaultdict(int)

for d in train_dicts:
  placesPerState[d['state']] += 1

placesPerState[""] = 0

In [None]:
X_train = scipy.sparse.lil_matrix((len(train_dicts), nUsers + nPlaces+2))

for i in range(len(train_dicts)):
    user = user_di[train_dicts[i]['gPlusUserId']]
    item = place_di[train_dicts[i]['gPlusPlaceId']]
    state = state_list.index(train_dicts[i]['state'])
    cord = state_coords[state]
    X_train[i,user] = 1 # One-hot encoding of user
    X_train[i,nUsers + item] = 1 # One-hot encoding of item
    X_train[i,nUsers + nPlaces + 0] = len(usersPerPlace[train_dicts[i]['gPlusPlaceId']])
    X_train[i,nUsers + nPlaces + 1] = placesPerState[train_dicts[i]['state']]

y_train = np.array([d['rating'] for d in train_dicts])

In [47]:
place_di[train_dicts[0]['gPlusPlaceId']]

648303

In [None]:
X_val = scipy.sparse.lil_matrix((len(val_dicts), nUsers + nPlaces+2))

for i in range(len(val_dicts)):
  user_id = val_dicts[i]['gPlusUserId']
  place_id = val_dicts[i]['gPlusPlaceId']
  state = state_list.index(val_dicts[i]['state'])
  cord = state_coords[state]
  X_val[i,nUsers + nPlaces + 0] = len(usersPerPlace[place_id])
  X_val[i,nUsers + nPlaces + 1] = placesPerState[val_dicts[i]['state']]
  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    state = state_list.index(train_dicts[i]['state'])
    X_val[i,user] = 1 # One-hot encoding of user
    X_val[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_val[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_val[i,nUsers + item] = 1 # One-hot encoding of item

y_val = np.array([d['rating'] for d in val_dicts])

In [None]:
X_test = scipy.sparse.lil_matrix((len(test_dicts), nUsers + nPlaces+2))

for i in range(len(test_dicts)):
  user_id = test_dicts[i]['gPlusUserId']
  place_id = test_dicts[i]['gPlusPlaceId']
  state = state_list.index(test_dicts[i]['state'])
  cord = state_coords[state]
  X_test[i,nUsers + nPlaces + 0] = len(usersPerPlace[place_id])
  X_test[i,nUsers + nPlaces + 1] = placesPerState[test_dicts[i]['state']]
  if(user_id in user_di and  place_id in place_di):
    user = user_di[user_id]
    item = place_di[place_id]
    X_test[i,user] = 1 # One-hot encoding of user
    X_test[i,nUsers + item] = 1 # One-hot encoding of item
  elif(user_id in user_di):
    user = user_di[user_id]
    X_test[i,user] = 1 # One-hot encoding of user
  elif(place_id in place_di):
    item = place_di[place_id]
    X_test[i,nUsers + item] = 1 # One-hot encoding of item

y_test = np.array([d['rating'] for d in test_dicts])

In [None]:
fm3 = als.FMRegression(init_stdev=0.1, rank=0, l2_reg_w=5)
fm3.fit(X_train, y_train)

y_pred_train = fm3.predict(X_train)
print(MSE(y_pred_train, y_train))

y_pred_val = fm3.predict(X_val)
print(MSE(y_pred_val, y_val))

y_test_pred = fm3.predict(X_test)

In [None]:
print(MSE(y_test_pred, y_test))