- xDeepFM(xTreeme)

- DeepFMを拡張して，クロスネット(CIN: Compressed Interaction Network)を用いて，高階の特徴交差を自動的に学習するモデルのこと．

In [None]:
         Input
           |
   ┌───────┴────────┐
   │                │
 Embedding       Linear（wide）
   │                │
   └───────┬────────┘
           │
   ┌───────┴──────────────┐
   │                      │
 Deep part (MLP)     CIN（Cross）
   │                      │
   └────────┬─────────────┘
            │
         Concatenate
            │
          Output

- 下の三つのパートを統合したハイブリッドモデル．

| パート名       | 役割                                           |
| ---------- | -------------------------------------------- |
| **Linear** | 明示的な1次特徴（Wide部分と同じ）                          |
| **CIN**    | 高階特徴交差を自動で学習（Compressed Interaction Network） |
| **Deep**   | 埋め込みベクトルをMLPに通して非線形な関係を学習                    |

- 損失関数は，sigmoid関数を用いる．
  

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 0:all, 1:filter INFO, 2:filter WARNING, 3:only ERROR

# データ読み込み
cols = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('u.data', sep='\t', names=cols)
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Leave-One-Out分割
df['rank'] = df.groupby('user_id')['timestamp'].rank(method='first', ascending=False)
train_df = df[df['rank'] > 1].copy()
test_df = df[df['rank'] == 1].copy()

# IDを数値に変換
user_enc = LabelEncoder()
item_enc = LabelEncoder()
train_df['user'] = user_enc.fit_transform(train_df['user_id'])
train_df['item'] = item_enc.fit_transform(train_df['item_id'])
test_df = test_df[test_df['user_id'].isin(user_enc.classes_)]
test_df = test_df[test_df['item_id'].isin(item_enc.classes_)]
test_df['user'] = user_enc.transform(test_df['user_id'])
test_df['item'] = item_enc.transform(test_df['item_id'])

# ラベル（rating >= 4を正例）
train_df['label'] = (train_df['rating'] >= 4).astype(int)
test_df['label'] = (test_df['rating'] >= 4).astype(int)

num_users = train_df['user'].nunique()
num_items = train_df['item'].nunique()

# Dataset定義
class FM_Dataset(Dataset):
    def __init__(self, df):
        self.users = torch.LongTensor(df['user'].values)
        self.items = torch.LongTensor(df['item'].values)
        self.labels = torch.FloatTensor(df['label'].values)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

train_loader = DataLoader(FM_Dataset(train_df), batch_size=128, shuffle=True)
test_loader = DataLoader(FM_Dataset(test_df), batch_size=256)

print("ok")

ok


In [10]:
import torch.nn as nn
import torch.nn.functional as F

class CIN(nn.Module):
    def __init__(self, input_dim, cin_layers):
        super(CIN, self).__init__()
        self.cin_layers = nn.ModuleList()
        self.field_nums = [input_dim]
        self.embedding_dim = None  # 初期化時には不明

        for layer_size in cin_layers:
            in_channels = self.field_nums[-1] * self.field_nums[0]  # 現在層 × 初期層
            self.cin_layers.append(
                nn.Conv1d(in_channels=in_channels, out_channels=layer_size, kernel_size=1)
            )
            self.field_nums.append(layer_size)

    def forward(self, x_0):
        # x_0: (B, F, D)
        if self.embedding_dim is None:
            self.embedding_dim = x_0.size(2)

        xs = []
        x = x_0
        for i, conv in enumerate(self.cin_layers):
            B, H_k, D = x.size()
            x_i = torch.einsum("bhd,bmd->bhm", x, x_0)  # (B, H_k, H_0)
            x_i = x_i.view(B, H_k * x_0.size(1), 1)     # (B, H_k * H_0, 1)
            x_i = conv(x_i)                             # (B, H_{k+1}, 1)
            x_i = F.relu(x_i)
            x_i = x_i.view(B, -1, self.embedding_dim)   # (B, H_{k+1}, D)
            xs.append(x_i)
            x = x_i

        x_stack = torch.cat(xs, dim=1)  # (B, sum(H_l), D)
        return torch.sum(x_stack, dim=2)  # (B, sum(H_l))



class xDeepFM(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=16, cin_layers=[16, 16], mlp_layers=[32, 16]):
        super(xDeepFM, self).__init__()
        self.user_embed = nn.Embedding(num_users, embedding_dim)
        self.item_embed = nn.Embedding(num_items, embedding_dim)

        self.linear_user = nn.Embedding(num_users, 1)
        self.linear_item = nn.Embedding(num_items, 1)

        self.cin = CIN(input_dim=2, cin_layers=cin_layers)

        input_dim = 2 * embedding_dim
        mlp = []
        for h in mlp_layers:
            mlp.append(nn.Linear(input_dim, h))
            mlp.append(nn.ReLU())
            input_dim = h
        self.mlp = nn.Sequential(*mlp)

        final_dim = 1 + 1 + cin_layers[-1] + mlp_layers[-1]
        self.output = nn.Linear(final_dim, 1)

    def forward(self, user, item):
        u_emb = self.user_embed(user)  # (B, D)
        i_emb = self.item_embed(item)  # (B, D)

        linear_part = self.linear_user(user) + self.linear_item(item)

        cin_input = torch.stack([u_emb, i_emb], dim=1)  # (B, F=2, D)
        cin_out = self.cin(cin_input)

        deep_input = torch.cat([u_emb, i_emb], dim=1)
        deep_out = self.mlp(deep_input)

        all_concat = torch.cat([linear_part, torch.sum(cin_out, dim=1, keepdim=True), deep_out], dim=1)
        out = torch.sigmoid(self.output(all_concat)).squeeze(1)
        return out

print("ok")

ok


In [11]:
# 学習・評価

from torch.optim import Adam
from sklearn.metrics import accuracy_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = xDeepFM(num_users, num_items).to(device)
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=0.001)

# 学習ループ
for epoch in range(5):
    model.train()
    total_loss = 0
    for users, items, labels in train_loader:
        users, items, labels = users.to(device), items.to(device), labels.to(device)
        preds = model(users, items)
        loss = criterion(preds, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")


RuntimeError: Given groups=1, weight of size [16, 32, 1], expected input[128, 2, 1] to have 32 channels, but got 2 channels instead