In [1]:
module_path = '/Users/kakao/study/RecModel'
import sys
sys.path.append(module_path)

import argparse

import pandas as pd
import numpy as np
import pytorch_lightning as pl
import torch
from torch.utils.data import DataLoader, random_split

from src.utils.custom_dataset import DeepFMDataset
from src.lit_model.lit_DeepFM import LitDeepFM
from src.model.DeepFM import DeepFM

In [2]:
def define_argparser():
    parser = argparse.ArgumentParser()

    parser.add_argument("--project", default="DeepFM")
    parser.add_argument(
        "--batch_size",
        type=int,
        default=256,
        help="input batch size for training (default: 256)",
    )
    parser.add_argument(
        "--embed_dim",
        type=int,
        default=16,
        help="embedding dimensions (default: 16)",
    )
    parser.add_argument(
        "--deep_output_dim",
        type=int,
        default=16,
        help="output dimensions of deep component (default: 16)",
    )
    parser.add_argument(
        "--deep_n_layers",
        type=int,
        default=1,
        help="num of mlp hidden layers (default: 1)",
    )
    parser.add_argument(
        "--deep_dropout",
        type=float,
        default=0.2,
        help="dropout rate (default: 0.2)",
    )
    parser.add_argument(
        "--epochs", type=int, default=10, help="number of epochs to train (default: 3)"
    )
    parser.add_argument("--cuda", type=int, default=0, help="0 for cpu -1 for all gpu")
    config = parser.parse_args(args=[])  # in jupyter notebook
    if config.cuda == 0 or torch.cuda.is_available() is False:
        config.cuda = 0

    return config

config = define_argparser()

In [3]:
config

Namespace(project='DeepFM', batch_size=256, embed_dim=16, deep_output_dim=16, deep_n_layers=1, deep_dropout=0.2, epochs=10, cuda=0)

In [4]:
data_path = "../data/kmrd_small/rates.csv"
data = pd.read_csv(data_path)

print(data.shape)

(140710, 4)


In [5]:
N_SAMPLES = 100000

data = data[:N_SAMPLES]

print(f'data.shape={data.shape}')
data.head()


data.shape=(100000, 4)


Unnamed: 0,user,movie,rate,time
0,0,10003,7,1494128040
1,0,10004,7,1467529800
2,0,10018,9,1513344120
3,0,10021,9,1424497980
4,0,10022,7,1427627340


In [6]:
train_ratio = 0.8

DeepFM_dataset = DeepFMDataset(data)

train_dataset, valid_dataset = random_split(
    DeepFM_dataset, [train_ratio, 1.0 - train_ratio]
)

In [7]:
# return: user field index, item field index, target
train_dataset[0]

(array([7811,  101], dtype=int32), 0.0)

In [8]:
train_loader = DataLoader(train_dataset, batch_size=config.batch_size)
valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size)

In [9]:
# torch model
DeepFM_torch_model = DeepFM(
    field_dims=DeepFM_dataset.field_dims,
    embed_dim=config.embed_dim,
    deep_output_dim=config.deep_output_dim,
    deep_n_layers=config.deep_n_layers,
    deep_dropout=config.deep_dropout,
)

In [10]:
DeepFM_lit_model = LitDeepFM(DeepFM_torch_model, config)

In [11]:
early_stopping_callback = pl.callbacks.EarlyStopping(
    monitor="validation/loss", mode="min", patience=3
)

trainer = pl.Trainer(
    log_every_n_steps=10,
    max_epochs=config.epochs,
    deterministic=True,
    callbacks=[early_stopping_callback],
    accelerator='cpu'
)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


In [12]:
# fit the model
trainer.fit(model=DeepFM_lit_model, train_dataloaders=train_loader, val_dataloaders=valid_loader)


  | Name      | Type           | Params
---------------------------------------------
0 | model     | DeepFM         | 373 K 
1 | loss_fn   | BCELoss        | 0     
2 | train_acc | BinaryAccuracy | 0     
3 | valid_acc | BinaryAccuracy | 0     
4 | test_acc  | BinaryAccuracy | 0     
---------------------------------------------
373 K     Trainable params
0         Non-trainable params
373 K     Total params
1.494     Total estimated model params size (MB)


                                                                            

  rank_zero_warn(
  rank_zero_warn(


Epoch 3: 100%|██████████| 313/313 [00:02<00:00, 143.67it/s, v_num=6, train/loss=0.302, validation/loss=0.588, validation/acc=0.754, train/acc=0.835]


In [13]:
from sklearn.metrics import accuracy_score

In [14]:
pred = []
true = []
for x, y in valid_loader:
    result = DeepFM_torch_model(x)
    pred += result.tolist()
    true += y.tolist()
    
pred = np.where(np.array(pred) > 0.5, 1, 0)

acc = accuracy_score(true, pred)

print(f"Acc = {round(acc, 4)}")

Acc = 0.7537
