# Keras MF experimentation

This notebook experiments with building MF models in Keras on TensorFlow

In [1]:
from lkdemo.datasets import ml20m

  from pandas.core.index import CategoricalIndex, RangeIndex, Index, MultiIndex


In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras as k

In [3]:
from lenskit import crossfold as xf

In [4]:
ratings = ml20m.ratings

In [5]:
uidx = pd.Index(ratings['user'].unique())
iidx = pd.Index(ratings['item'].unique())
ratings['uno'] = uidx.get_indexer(ratings['user']).astype('i4')
ratings['ino'] = iidx.get_indexer(ratings['item']).astype('i4')
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000263 entries, 0 to 20000262
Data columns (total 6 columns):
 #   Column     Dtype  
---  ------     -----  
 0   user       int32  
 1   item       int32  
 2   rating     float64
 3   timestamp  int32  
 4   uno        int32  
 5   ino        int32  
dtypes: float64(1), int32(5)
memory usage: 534.1 MB


In [6]:
n_users = len(uidx)
n_items = len(iidx)

In [7]:
train, test = next(xf.sample_users(ratings, 1, 10000, xf.SampleN(5)))

In [8]:
gbias = train['rating'].mean()
ntrs = train.assign(nrating = ratings['rating'] - gbias)
ibias = ntrs.groupby('item')['nrating'].mean().rename('i_bias')
ntrs = ntrs.join(ibias, on='item')
ntrs['nrating'] -= ntrs['i_bias'].fillna(0)
ubias = ntrs.groupby('user')['nrating'].mean().rename('u_bias')
ntrs = ntrs.join(ubias, on='user')
ntrs['nrating'] -= ntrs['u_bias']
ntrs.head()

Unnamed: 0,user,item,rating,timestamp,uno,ino,nrating,i_bias,u_bias
0,1,2,3.5,1112486027,0,0,0.175738,-0.314702,0.113696
1,1,29,3.5,1112484676,0,1,-0.567022,0.428058,0.113696
2,1,32,3.5,1112484819,0,2,-0.511906,0.372942,0.113696
3,1,47,3.5,1112484727,0,3,-0.667228,0.528264,0.113696
4,1,50,3.5,1112484580,0,4,-0.948052,0.809088,0.113696


## Regularized MF for explicit ratings

We're going to build up regularized MF for explicit ratings, based on [this tutorial](https://towardsdatascience.com/building-a-book-recommendation-system-using-keras-1fba34180699) and [this example code](https://github.com/chinchi-hsu/KerasCollaborativeFiltering):

In [9]:
features = 25

First, the user layers:

In [10]:
u_input = k.Input(shape=(1,), dtype='int32', name='user')
u_reg = k.regularizers.l2(0.02)
u_embed = k.layers.Embedding(input_dim=n_users, output_dim=features,
                             embeddings_regularizer=u_reg, embeddings_initializer='random_normal',
                             name='user-embed')(u_input)
u_flat = k.layers.Flatten(name='user-vector')(u_embed)

And the item layers:

In [11]:
i_input = k.Input(shape=(1,), dtype='int32', name='item')
i_reg = k.regularizers.l2(0.02)
i_embed = k.layers.Embedding(input_dim=n_items, output_dim=features,
                             embeddings_regularizer=i_reg, embeddings_initializer='random_normal',
                             name='item-embed')(i_input)
i_flat = k.layers.Flatten(name='item-vector')(i_embed)

And put it together:

In [12]:
prod = k.layers.Dot(name='score', axes=1)([u_flat, i_flat])
model = k.Model([u_input, i_input], prod, name='classic-mf')
model.summary()

Model: "classic-mf"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
item (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
user-embed (Embedding)          (None, 1, 25)        3462325     user[0][0]                       
__________________________________________________________________________________________________
item-embed (Embedding)          (None, 1, 25)        668600      item[0][0]                       
_________________________________________________________________________________________

In [13]:
model.compile('adam', 'mean_squared_error', metrics=['mae'])

In [14]:
model.fit([ntrs.uno, ntrs.ino], ntrs.nrating, epochs=5, batch_size=1024*8)

Train on 19950263 samples
Epoch 1/5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1f33f97be08>

In [20]:
preds = model.predict([test.uno, test.ino])
preds = test.assign(pred=preds)
preds.head()

Unnamed: 0,user,item,rating,timestamp,uno,ino,pred
1814,15,141,3.0,840207366,14,391,-3.935818e-10
1813,15,50,3.0,840207617,14,4,-2.985101e-10
1831,15,380,3.0,840206642,14,373,4.647037e-10
1829,15,364,4.0,840207618,14,396,5.347735e-11
1826,15,344,3.0,840206704,14,646,-1.496031e-09


In [22]:
preds = preds.join(ubias, on='user')
preds = preds.join(ibias, on='item')
preds['pred'] += gbias
preds['pred'] += preds.u_bias
preds['pred'] += preds.i_bias
preds.head()

Unnamed: 0,user,item,rating,timestamp,uno,ino,pred,u_bias,i_bias
1814,15,141,3.0,840207366,14,391,3.032048,-0.478474,-0.014746
1813,15,50,3.0,840207617,14,4,3.855882,-0.478474,0.809088
1831,15,380,3.0,840206642,14,373,3.012413,-0.478474,-0.034381
1829,15,364,4.0,840207618,14,396,3.300074,-0.478474,0.25328
1826,15,344,3.0,840206704,14,646,2.504175,-0.478474,-0.542619


In [25]:
preds['error'] = preds['rating'] - preds['pred']
np.sqrt(np.mean(np.square(preds['error'])))

0.9035127570475177