In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install deepctr
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import mean_absolute_error
from deepctr.models import *
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names



In [None]:
# read the data
df_train_ratings = pd.read_csv('/content/drive/Shareddrives/E-Commerce Project/ml-100k/ml-100k/u1.base', 
                    sep='\t', 
                    engine='python', 
                    encoding='latin-1',
                    names=['User_id', 'Item_id', 'rating', 'timestamp'])
df_test_ratings = pd.read_csv('/content/drive/Shareddrives/E-Commerce Project/ml-100k/ml-100k/u1.test', 
                    sep='\t', 
                    engine='python', 
                    encoding='latin-1',
                    names=['User_id', 'Item_id', 'rating', 'timestamp'])
df_users_info = pd.read_csv('/content/drive/Shareddrives/E-Commerce Project/ml-100k/ml-100k/u.user',delimiter='|',names=['User_id','age','gender','occupation','zip_code'])
df_items_info = pd.read_csv('/content/drive/Shareddrives/E-Commerce Project/ml-100k/ml-100k/u.item',delimiter='|', encoding = "ISO-8859-1", names=['Item_id', 'movie_title', 'release_date', 'video_release_date','IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation','Children', 'Comedy', 'Crime', 'Documentary', 'Drama','Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance','Sci-Fi', 'Thriller', 'War', 'Western'])

# merge the data for more features
df_temp_merge = pd.merge(df_train_ratings, df_users_info)
df_train_merged = pd.merge(df_temp_merge, df_items_info) 

df_temp_merge = pd.merge(df_test_ratings, df_users_info)
df_test_merged = pd.merge(df_temp_merge, df_items_info) 

In [None]:
# define the relevant features for the model
sparse_features = ['User_id', 'Item_id', 'timestamp', 'gender']
target = ['rating']

In [None]:
 # 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
    lbe = LabelEncoder()
    df_train_merged[feat] = lbe.fit_transform(df_train_merged[feat])
    df_test_merged[feat] = lbe.fit_transform(df_test_merged[feat])

In [None]:
# 2.count #unique features for each sparse field,and record dense feature field name

fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=df_train_merged[feat].max() + 1,embedding_dim=4)
                        for i,feat in enumerate(sparse_features)]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [None]:
# 3.generate input data for model

# train, test = train_test_split(df_merged, test_size=0.2, random_state=2020)
train = df_train_merged
test = df_test_merged
train_model_input = {name:train[name] for name in feature_names}
test_model_input = {name:test[name] for name in feature_names}


In [None]:
# 4.Define Model 1 ,train,predict and evaluate 
model_1 = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model_1.compile("adam", "mse",
              metrics=['mse'], )

history = model_1.fit(train_model_input, train[target].values,
                    batch_size=256, epochs=30, verbose=2, validation_split=0.2, )
pred_ans = model_1.predict(test_model_input, batch_size=256)
print("mae", round(mean_absolute_error(test[target].values, pred_ans), 4))

Epoch 1/30
250/250 - 3s - loss: 2.4152 - mse: 2.4150 - val_loss: 1.1231 - val_mse: 1.1228
Epoch 2/30
250/250 - 1s - loss: 0.6742 - mse: 0.6736 - val_loss: 1.1557 - val_mse: 1.1549
Epoch 3/30
250/250 - 1s - loss: 0.4934 - mse: 0.4924 - val_loss: 1.1982 - val_mse: 1.1971
Epoch 4/30
250/250 - 1s - loss: 0.4316 - mse: 0.4304 - val_loss: 1.2371 - val_mse: 1.2358
Epoch 5/30
250/250 - 1s - loss: 0.3987 - mse: 0.3973 - val_loss: 1.2706 - val_mse: 1.2691
Epoch 6/30
250/250 - 1s - loss: 0.3784 - mse: 0.3767 - val_loss: 1.2880 - val_mse: 1.2862
Epoch 7/30
250/250 - 1s - loss: 0.3642 - mse: 0.3623 - val_loss: 1.2661 - val_mse: 1.2641
Epoch 8/30
250/250 - 1s - loss: 0.3527 - mse: 0.3506 - val_loss: 1.3096 - val_mse: 1.3074
Epoch 9/30
250/250 - 1s - loss: 0.3370 - mse: 0.3347 - val_loss: 1.3120 - val_mse: 1.3095
Epoch 10/30
250/250 - 1s - loss: 0.3167 - mse: 0.3141 - val_loss: 1.2942 - val_mse: 1.2915
Epoch 11/30
250/250 - 1s - loss: 0.2957 - mse: 0.2929 - val_loss: 1.2998 - val_mse: 1.2969
Epoch 12

model 2

In [None]:
# Define Model 2 ,train,predict and evaluate 
model_2 = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model_2.compile("sgd", "mse",
              metrics=['mse'], )

history = model_2.fit(train_model_input, train[target].values,
                    batch_size=256, epochs=30, verbose=2, validation_split=0.2, )
pred_ans = model_2.predict(test_model_input, batch_size=256)
print("mae", round(mean_absolute_error(test[target].values, pred_ans), 4))

Epoch 1/30
250/250 - 2s - loss: 1.5471 - mse: 1.5471 - val_loss: 1.5331 - val_mse: 1.5331
Epoch 2/30
250/250 - 1s - loss: 1.1697 - mse: 1.1697 - val_loss: 1.5361 - val_mse: 1.5361
Epoch 3/30
250/250 - 1s - loss: 1.1574 - mse: 1.1574 - val_loss: 1.5470 - val_mse: 1.5470
Epoch 4/30
250/250 - 1s - loss: 1.1431 - mse: 1.1431 - val_loss: 1.4660 - val_mse: 1.4660
Epoch 5/30
250/250 - 1s - loss: 1.1249 - mse: 1.1249 - val_loss: 1.4714 - val_mse: 1.4714
Epoch 6/30
250/250 - 1s - loss: 1.1001 - mse: 1.1001 - val_loss: 1.4109 - val_mse: 1.4109
Epoch 7/30
250/250 - 1s - loss: 1.0671 - mse: 1.0671 - val_loss: 1.4016 - val_mse: 1.4016
Epoch 8/30
250/250 - 1s - loss: 1.0239 - mse: 1.0239 - val_loss: 1.3551 - val_mse: 1.3551
Epoch 9/30
250/250 - 1s - loss: 0.9743 - mse: 0.9743 - val_loss: 1.3171 - val_mse: 1.3170
Epoch 10/30
250/250 - 1s - loss: 0.9295 - mse: 0.9295 - val_loss: 1.2680 - val_mse: 1.2679
Epoch 11/30
250/250 - 1s - loss: 0.8979 - mse: 0.8979 - val_loss: 1.2475 - val_mse: 1.2474
Epoch 12

change the features

In [None]:
# define the relevant features for the model
sparse_features = ['User_id', 'Item_id', 'age', 'occupation']
target = ['rating']
 # 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
    lbe = LabelEncoder()
    df_train_merged[feat] = lbe.fit_transform(df_train_merged[feat])
    df_test_merged[feat] = lbe.fit_transform(df_test_merged[feat])
# 2.count #unique features for each sparse field,and record dense feature field name

fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=df_train_merged[feat].max() + 1,embedding_dim=4)
                        for i,feat in enumerate(sparse_features)]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 3.generate input data for model

# train, test = train_test_split(df_merged, test_size=0.2, random_state=2020)
train = df_train_merged
test = df_test_merged
train_model_input = {name:train[name] for name in feature_names}
test_model_input = {name:test[name] for name in feature_names}

model 1

In [None]:
# 4.Define Model 1,train,predict and evaluate
model_1 = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model_1.compile("adam", "mse",
              metrics=['mse'], )

history = model_1.fit(train_model_input, train[target].values,
                    batch_size=256, epochs=30, verbose=2, validation_split=0.2, )
pred_ans = model_1.predict(test_model_input, batch_size=256)
print("mae", round(mean_absolute_error(test[target].values, pred_ans), 4))

Epoch 1/30
250/250 - 2s - loss: 2.3226 - mse: 2.3226 - val_loss: 1.1703 - val_mse: 1.1702
Epoch 2/30
250/250 - 1s - loss: 0.8644 - mse: 0.8643 - val_loss: 1.1721 - val_mse: 1.1720
Epoch 3/30
250/250 - 1s - loss: 0.8559 - mse: 0.8558 - val_loss: 1.1697 - val_mse: 1.1696
Epoch 4/30
250/250 - 1s - loss: 0.8492 - mse: 0.8491 - val_loss: 1.1699 - val_mse: 1.1698
Epoch 5/30
250/250 - 1s - loss: 0.8437 - mse: 0.8436 - val_loss: 1.1732 - val_mse: 1.1731
Epoch 6/30
250/250 - 1s - loss: 0.8315 - mse: 0.8313 - val_loss: 1.1823 - val_mse: 1.1821
Epoch 7/30
250/250 - 1s - loss: 0.8221 - mse: 0.8219 - val_loss: 1.1965 - val_mse: 1.1963
Epoch 8/30
250/250 - 1s - loss: 0.8150 - mse: 0.8148 - val_loss: 1.2056 - val_mse: 1.2053
Epoch 9/30
250/250 - 1s - loss: 0.8119 - mse: 0.8117 - val_loss: 1.2130 - val_mse: 1.2128
Epoch 10/30
250/250 - 1s - loss: 0.8025 - mse: 0.8023 - val_loss: 1.2196 - val_mse: 1.2193
Epoch 11/30
250/250 - 1s - loss: 0.7961 - mse: 0.7958 - val_loss: 1.2302 - val_mse: 1.2299
Epoch 12

model 2

In [None]:
# Define Model 2,train,predict and evaluate
model_2 = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model_2.compile("sgd", "mse",
              metrics=['mse'], )

history = model_2.fit(train_model_input, train[target].values,
                    batch_size=256, epochs=30, verbose=2, validation_split=0.2, )
pred_ans = model_2.predict(test_model_input, batch_size=256)
print("mae", round(mean_absolute_error(test[target].values, pred_ans), 4))

Epoch 1/30
250/250 - 2s - loss: 1.5743 - mse: 1.5743 - val_loss: 1.5313 - val_mse: 1.5313
Epoch 2/30
250/250 - 1s - loss: 1.1635 - mse: 1.1635 - val_loss: 1.5244 - val_mse: 1.5244
Epoch 3/30
250/250 - 1s - loss: 1.1489 - mse: 1.1489 - val_loss: 1.4836 - val_mse: 1.4836
Epoch 4/30
250/250 - 1s - loss: 1.1334 - mse: 1.1334 - val_loss: 1.4797 - val_mse: 1.4797
Epoch 5/30
250/250 - 1s - loss: 1.1160 - mse: 1.1160 - val_loss: 1.4702 - val_mse: 1.4702
Epoch 6/30
250/250 - 1s - loss: 1.0942 - mse: 1.0942 - val_loss: 1.4433 - val_mse: 1.4433
Epoch 7/30
250/250 - 1s - loss: 1.0642 - mse: 1.0642 - val_loss: 1.3958 - val_mse: 1.3958
Epoch 8/30
250/250 - 1s - loss: 1.0225 - mse: 1.0225 - val_loss: 1.3238 - val_mse: 1.3237
Epoch 9/30
250/250 - 1s - loss: 0.9726 - mse: 0.9725 - val_loss: 1.3222 - val_mse: 1.3222
Epoch 10/30
250/250 - 1s - loss: 0.9277 - mse: 0.9277 - val_loss: 1.2760 - val_mse: 1.2760
Epoch 11/30
250/250 - 1s - loss: 0.8970 - mse: 0.8970 - val_loss: 1.2373 - val_mse: 1.2373
Epoch 12