In [9]:
import pandas as pd
import numpy as np
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [10]:
from sklearn.metrics import log_loss, roc_auc_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names

In [11]:
data = pd.read_csv('movie_data.csv')
target = ['rating']

In [12]:
for str in data.columns:
    new_str = str.replace(' ', '_')
    data.rename(columns={str:new_str}, inplace = True)

In [13]:
sparse_features = data.columns

# 1.Label Encoding for sparse features,and do simple Transformation for dense features
for feat in sparse_features:
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])
# 2.count #unique features for each sparse field
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique()) for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [14]:
# 3.generate input data for model
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name: train[name] for name in feature_names}
test_model_input = {name: test[name] for name in feature_names}

# 4.Define Model,train,predict and evaluate
device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )

history = model.fit(train_model_input,train[target].values,batch_size=256,epochs=10,verbose=2,validation_split=0.2)
pred_ans = model.predict(test_model_input, batch_size=256)
print("test MSE", round(mean_squared_error(test[target].values, pred_ans), 4))

cuda ready...
Train on 28823 samples, validate on 7206 samples
Epoch 1/10
 - 6s - loss: 8.7422 - mean_squared_error: 8.7422 - val_loss: 0.5612 - val_mean_squared_error: 0.5611
Epoch 2/10
 - 1s - loss: 0.0927 - mean_squared_error: 0.0926 - val_loss: 0.0040 - val_mean_squared_error: 0.0039
Epoch 3/10
 - 1s - loss: 0.0011 - mean_squared_error: 0.0010 - val_loss: 4.8073e-04 - val_mean_squared_error: 3.9254e-04
Epoch 4/10
 - 1s - loss: 2.9077e-04 - mean_squared_error: 2.0261e-04 - val_loss: 2.7978e-04 - val_mean_squared_error: 1.9169e-04
Epoch 5/10
 - 1s - loss: 1.9957e-04 - mean_squared_error: 1.1159e-04 - val_loss: 2.1525e-04 - val_mean_squared_error: 1.2739e-04
Epoch 6/10
 - 1s - loss: 1.5693e-04 - mean_squared_error: 6.9181e-05 - val_loss: 1.8014e-04 - val_mean_squared_error: 9.2531e-05
Epoch 7/10
 - 1s - loss: 1.4080e-04 - mean_squared_error: 5.3373e-05 - val_loss: 1.7827e-04 - val_mean_squared_error: 9.1020e-05
Epoch 8/10
 - 1s - loss: 1.3994e-04 - mean_squared_error: 5.2874e-05 - val

In [15]:
# cai de yi