* **Reference model structure for SVD**

In [6]:
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import metrics
from tensorflow.keras.utils import to_categorical

In [7]:
user_in_layer = Input(shape=(944,))#top half of input layer,+1 to accomodate on-hot encoded vectors
movie_in_layer = Input(shape=(1683,))#bottom half of input layer

#total input moves as concatenated user_id & movie_id one-hot encoded forms

hid_layer_u = Dense(100)(user_in_layer)#latent dimension k=100 for user_id #_Embedding(100, )
hid_layer_m= Dense(100)(movie_in_layer)#latent dimension k=100 for movie_id

merge_layer = keras.layers.dot([hid_layer_u, hid_layer_m], axes=1)



predictions = Dense(1, activation='sigmoid')(merge_layer)

model = Model(inputs=[user_in_layer, movie_in_layer], outputs= predictions)

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 944)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1683)]       0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 100)          94500       input_1[0][0]                    
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 100)          168400      input_2[0][0]                    
______________________________________________________________________________________________

* **Using `deepctr.DeepFM`**

In [8]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from deepctr.models import DeepFM
from deepctr.inputs import SparseFeat,get_fixlen_feature_names

In [9]:
data = pd.read_csv('./movielens_sample.txt')
data.head(4)

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip
0,3299,235,4,968035345,Ed Wood (1994),Comedy|Drama,F,25,4,19119
1,3630,3256,3,966536874,Patriot Games (1992),Action|Thriller,M,18,4,77005
2,517,105,4,976203603,"Bridges of Madison County, The (1995)",Drama|Romance,F,25,14,55408
3,785,2115,3,975430389,Indiana Jones and the Temple of Doom (1984),Action|Adventure,M,18,19,29307


In [10]:
sparse_features = ["movie_id", "user_id"]
y= ['rating']
for feat in sparse_features:
        lbe = LabelEncoder()
        data[feat] = lbe.fit_transform(data[feat])
    # 2.count #unique features for each sparse field
data.head(4)

Unnamed: 0,user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip
0,107,12,4,968035345,Ed Wood (1994),Comedy|Drama,F,25,4,19119
1,123,169,3,966536874,Patriot Games (1992),Action|Thriller,M,18,4,77005
2,12,6,4,976203603,"Bridges of Madison County, The (1995)",Drama|Romance,F,25,14,55408
3,21,112,3,975430389,Indiana Jones and the Temple of Doom (1984),Action|Adventure,M,18,19,29307


In [22]:
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns

In [14]:
train, test = train_test_split(data, test_size=0.2)
train_model_input = [train[name].values for name in sparse_features]#includes values from only data[user_id], data[movie_id]
test_model_input = [test[name].values for name in sparse_features]#includes values from only data[user_id], data[movie_id]

print('no. of columns included in train & test:', len(train_model_input))

no. of columns included in train & test: 2


**In the following DeepFM( ) model**
* Embedding size ~ num_factors(surprise SVD)= 100

* The Iput layer receives the `OrdinalEncoded` values of `User_id` and `movie_id` which are further one-hot encoded before spare layers `sparse_emb_movie_id` & `sparse_emb_user_id`.
* The `user_factors` (pu) & `item_factors` (qi) vectors are realised as weights of shape (187,100) and (193,100) resepectively

In [45]:
print('number of unique features in movie_id:', data[sparse_features[0]].nunique())

number of unique features in movie_id: 187


In [46]:
print('number of unique features in user_id:', data[sparse_features[1]].nunique())

number of unique features in user_id: 193


In [24]:
#model = DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=100,dnn_hidden_units=(), task='regression')#DNN=Flase
model = DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=100, task='regression')#DNN=Flase
model.compile("adam", "mse", metrics=['mse'], )
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
movie_id (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_id (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
sparse_emb_movie_id (Embedding) (None, 1, 100)       18700       movie_id[0][0]                   
__________________________________________________________________________________________________
sparse_emb_user_id (Embedding)  (None, 1, 100)       19300       user_id[0][0]                    
____________________________________________________________________________________________

* Above model summary exhibits some additional layers, irrelevant in comparison to NN_SVD structure as before.

In [25]:
from deepctr.models import svd

In [27]:
??svd

In [36]:
from deepctr.inputs import input_from_feature_columns, get_linear_logit,build_input_features,combined_dnn_input
import tensorflow as tf
from deepctr.layers.utils import concat_fun
from deepctr.layers.interaction import FM
from deepctr.layers.core import PredictionLayer

In [42]:
def SVD(feature_columns, embedding_size=100,
        l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, init_std=0.0001, seed=1024, bi_dropout=0,
        dnn_dropout=0, act_func='sigmoid', task='binary'):

    features = build_input_features(feature_columns)

    input_layers = list(features.values())
    sparse_embedding_list, dense_value_list = input_from_feature_columns(features,feature_columns,
                                                                              embedding_size,
                                                                              l2_reg_embedding,init_std,
                                                                              seed)
    
    fm_input = concat_fun(sparse_embedding_list, axis=1)
    fm_logit = FM()(fm_input)
    #hid_layer_1= Dense(num_factors)(input_layers[0])
    #output = PredictionLayer(task)(final_logit)
    model = tf.keras.models.Model(inputs=input_layers, outputs=fm_logit)
    return model

In [43]:
model2 = SVD(linear_feature_columns, embedding_size=100)
model2.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
movie_id (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
user_id (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
sparse_emb_movie_id (Embedding) (None, 1, 100)       18700       movie_id[0][0]                   
__________________________________________________________________________________________________
sparse_emb_user_id (Embedding)  (None, 1, 100)       19300       user_id[0][0]                    
____________________________________________________________________________________________