# Keras MF experimentation

This notebook experiments with building MF models in Keras on TensorFlow

In [1]:
from lkdemo.datasets import ml20m

  from pandas.core.index import CategoricalIndex, RangeIndex, Index, MultiIndex


ModuleNotFoundError: No module named 'binpickle'

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras as k

In [None]:
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender
from lenskit.algorithms.basic import Bias
from lenskit.algorithms.als import BiasedMF
from lenskit.algorithms.funksvd import FunkSVD
from lenskit import batch

In [None]:
ratings = ml20m.ratings

In [None]:
uidx = pd.Index(ratings['user'].unique())
iidx = pd.Index(ratings['item'].unique())
ratings['uno'] = uidx.get_indexer(ratings['user']).astype('i4')
ratings['ino'] = iidx.get_indexer(ratings['item']).astype('i4')
ratings.info()

In [2]:
n_users = len(uidx)
n_users

NameError: name 'uidx' is not defined

In [3]:
n_items = len(iidx)
n_items

NameError: name 'iidx' is not defined

In [9]:
train, test = next(xf.sample_users(ratings, 1, 10000, xf.SampleN(5)))

In [10]:
bias = Bias()
bias.fit(train)

<lenskit.algorithms.basic.Bias at 0x1d25be6cb48>

In [11]:
bias_preds = batch.predict(bias, test)
bias_preds['error'] = bias_preds['rating'] - bias_preds['prediction']
np.sqrt(np.mean(np.square(bias_preds['error'])))

0.9121856079493474

In [12]:
als = BiasedMF(25)
als.fit(train)

<lenskit.algorithms.als.BiasedMF at 0x1d259a08ec8>

In [13]:
als_preds = batch.predict(als, test)
als_preds['error'] = als_preds['rating'] - als_preds['prediction']
np.sqrt(np.mean(np.square(als_preds['error'])))

0.8633453905488548

In [14]:
funk = FunkSVD(25)
funk.fit(train)

<lenskit.algorithms.funksvd.FunkSVD at 0x1d209f241c8>

In [29]:
funk_preds = batch.predict(funk, test)
funk_preds['error'] = funk_preds['rating'] - funk_preds['prediction']
np.sqrt(np.mean(np.square(funk_preds['error'])))

0.8421827063075318

In [16]:
gbias = train['rating'].mean()
ntrs = train.assign(nrating = ratings['rating'] - gbias)
ibias = ntrs.groupby('item')['nrating'].mean().rename('i_bias')
ntrs = ntrs.join(ibias, on='item')
ntrs['nrating'] -= ntrs['i_bias'].fillna(0)
ubias = ntrs.groupby('user')['nrating'].mean().rename('u_bias')
ntrs = ntrs.join(ubias, on='user')
ntrs['nrating'] -= ntrs['u_bias']
ntrs.head()

Unnamed: 0,user,item,rating,timestamp,uno,ino,nrating,i_bias,u_bias
0,1,2,3.5,1112486027,0,0,0.174737,-0.313792,0.113771
1,1,29,3.5,1112484676,0,1,-0.565218,0.426163,0.113771
2,1,32,3.5,1112484819,0,2,-0.511909,0.372853,0.113771
3,1,47,3.5,1112484727,0,3,-0.667377,0.528322,0.113771
4,1,50,3.5,1112484580,0,4,-0.948236,0.809181,0.113771


## Regularized MF for explicit ratings

We're going to build up regularized MF for explicit ratings, based on [this tutorial](https://towardsdatascience.com/building-a-book-recommendation-system-using-keras-1fba34180699) and [this example code](https://github.com/chinchi-hsu/KerasCollaborativeFiltering):

In [17]:
features = 25

In [18]:
k.backend.set_floatx('float64')

First, the user layers:

In [19]:
u_input = k.Input(shape=(1,), dtype='int32', name='user')
u_reg = k.regularizers.l2(0.02)
u_embed = k.layers.Embedding(input_dim=n_users, output_dim=features, input_length=1,
                             activity_regularizer=u_reg,
                             embeddings_initializer='random_normal',
                             name='user-embed')(u_input)
u_flat = k.layers.Flatten(name='user-vector')(u_embed)

And the item layers:

In [20]:
i_input = k.Input(shape=(1,), dtype='int32', name='item')
i_reg = k.regularizers.l2(0.02)
i_embed = k.layers.Embedding(input_dim=n_items, output_dim=features, input_length=1,
                             activity_regularizer=i_reg,
                             embeddings_initializer='random_normal',
                             name='item-embed')(i_input)
i_flat = k.layers.Flatten(name='item-vector')(i_embed)

And put it together:

In [21]:
prod = k.layers.Dot(name='score', axes=1)([u_flat, i_flat])
model = k.Model([u_input, i_input], prod, name='classic-mf')
model.summary()

Model: "classic-mf"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
item (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
user-embed (Embedding)          (None, 1, 25)        3462325     user[0][0]                       
__________________________________________________________________________________________________
item-embed (Embedding)          (None, 1, 25)        668600      item[0][0]                       
_________________________________________________________________________________________

In [22]:
model.compile('adam', 'mean_squared_error', metrics=['mae'])

In [23]:
mfit = model.fit([ntrs.uno, ntrs.ino], ntrs.nrating, epochs=5, batch_size=1024*16)

Train on 19950263 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
preds = model.predict([test.uno, test.ino])
preds = test.assign(pred=preds)
preds.head()

Unnamed: 0,user,item,rating,timestamp,uno,ino,pred
1478,12,260,4.0,859063825,11,9,-0.098802
1479,12,344,4.0,859063995,11,646,0.225896
1480,12,356,4.0,859064001,11,369,-0.530226
1469,12,17,3.0,859063719,11,387,-0.072415
1497,12,784,4.0,859063825,11,704,0.353181


In [25]:
preds['pred'].describe()

count    50000.000000
mean         0.054615
std          0.282989
min         -1.787960
25%         -0.112313
50%          0.054814
75%          0.221758
max          1.786055
Name: pred, dtype: float64

In [26]:
preds = preds.join(ubias, on='user')
preds = preds.join(ibias, on='item')
preds['pred'] += gbias
preds['pred'] += preds.u_bias
preds['pred'] += preds.i_bias
preds['bpred'] = gbias
preds['bpred'] += preds.u_bias
preds['bpred'] += preds.i_bias
preds.head()

Unnamed: 0,user,item,rating,timestamp,uno,ino,pred,u_bias,i_bias,bpred
1478,12,260,4.0,859063825,11,9,4.09004,-0.001641,0.665199,4.188842
1479,12,344,4.0,859063995,11,646,3.206802,-0.001641,-0.542737,2.980906
1480,12,356,4.0,859064001,11,369,3.496787,-0.001641,0.50337,4.027013
1469,12,17,3.0,859063719,11,387,3.893961,-0.001641,0.442733,3.966376
1497,12,784,4.0,859063825,11,704,3.054327,-0.001641,-0.822498,2.701145


In [27]:
preds['error'] = preds['rating'] - preds['pred']
np.sqrt(np.mean(np.square(preds['error'])))

0.8547722337625112

In [28]:
preds['berror'] = preds['rating'] - preds['bpred']
np.sqrt(np.mean(np.square(preds['berror'])))

0.9121911716149231