In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from itertools import chain
from tqdm import tqdm
import pdb
import torchinfo

  from .autonotebook import tqdm as notebook_tqdm


In [234]:
class FM(nn.Module):
    def __init__(self, num_user, num_book, latent_dim, n_hidden_1, n_hidden_2, n_output_1, n_output_2):
        """
        latent_dim: 各个离散特征隐向量的维度
        input_shape: 这个最后离散特征embedding之后的拼接和dense拼接的总特征个数
        feature_1_user: 用户带bias的embedding vector 1 x latent_dim
        feature_1_book: 书带bias的embedding vector 1 x latent_dim
        feature_high_user: 用户高阶特征向量 1 x n_output_1
        feature_high_book: 书高阶特征向量 1 x n_output_1
        """
        super(FM, self).__init__()
        self.latent_dim = latent_dim
        # 定义三个矩阵， 一个是全局偏置，一个是一阶权重矩阵， 一个是二阶交叉矩阵，注意这里的参数由于是可学习参数，需要用nn.Parameter进行定义
        self.bias_user = nn.Parameter(torch.ones([1, latent_dim]))
        self.bias_book = nn.Parameter(torch.ones([1, latent_dim]))
        self.emb_user = nn.Embedding(num_user,latent_dim)
        self.emb_book = nn.Embedding(num_book,latent_dim)
        self.network_user = nn.Sequential(
            nn.Linear(latent_dim, n_hidden_1),
            nn.ReLU(),
            nn.Linear(n_hidden_1, n_output_1)
        )
        self.network_book = nn.Sequential(
            nn.Linear(latent_dim, n_hidden_1),
            nn.ReLU(),
            nn.Linear(n_hidden_1, n_output_1)
        )
        self.net_similatiry = nn.Sequential(
            nn.Linear(2*n_output_1 + 2*latent_dim, n_hidden_2),
            nn.ReLU(),
            nn.Linear(n_hidden_2, n_output_2)
        )
 
    def forward(self, inputs):
        feature_1_user = self.emb_user(inputs[0]) + self.bias_user
        feature_1_book = self.emb_book(inputs[0]) + self.bias_book

        feature_high_user = self.network_user(feature_1_user)
        feature_high_book = self.network_book(feature_1_book)

        feature_all = torch.cat([feature_1_user, feature_1_book, feature_high_user, feature_high_book],1)
        score = self.net_similatiry(feature_all)
        
        if n_output_2 == 1:
            print(score)
            return score
        else:
            score = F.softmax(score, dim=1)
            return score.argmax(dim=1, keepdim=False) + 0. # Change texsor to float type

    def params(self):
        params = [self.bias_user,
                  self.bias_book,
                  self.emb_user.parameters(),
                  self.emb_book.parameters(),
                  self.network_user.parameters(),
                  self.network_book.parameters(),
                  self.net_similatiry.parameters()]
        return filter(lambda p: p.requires_grad, chain(*params))


class Ratingdataset(Dataset):
    def __init__(self, data_path, user2idx, book2idx):
        df = pd.read_csv(data_path, header=None, index_col=None)
        df = df.drop_duplicates()
        # df = df.drop(df[df[2] == 0].index) # New
        self.user2idx = user2idx
        self.book2idx = book2idx
        self.data = df.values
        self._len = df.shape[0]

    def __getitem__(self, id_index):
        user_idx = self.user2idx[self.data[id_index,0]]
        book_idx = self.book2idx[self.data[id_index,1]]
        rate = self.data[id_index,2]
        return user_idx,book_idx,rate

    def __len__(self):
        return self._len

In [235]:
df_all = pd.read_csv("Ratings.csv",header=0,index_col=None)
df_all = df_all.drop_duplicates()
user_all = df_all.iloc[:,0].unique().tolist()
user2idx = {}
book_all = df_all.iloc[:,1].unique().tolist()
book2idx = {}
for i in range(len(user_all)):
    user2idx[user_all[i]] = i
for i in range(len(book_all)):
    book2idx[book_all[i]] = i

In [247]:
epoch = 10
batch_size = 1000
lr = 1e-4
latent_dim = 10
n_hidden_1 = 50
n_hidden_2 = 30
n_output_1 = 25
# n_output_2 = 1
n_output_2 = 11
alpha = 0.5

In [248]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = FM(len(user2idx), len(book2idx), latent_dim=latent_dim, 
        n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2, 
        n_output_1=n_output_1, n_output_2=n_output_2).to(device)


dataset_train = Ratingdataset("train_ratings.csv", user2idx, book2idx)
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

dataset_test = Ratingdataset("test_ratings.csv", user2idx, book2idx)
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

In [249]:
torchinfo.summary(model)

Layer (type:depth-idx)                   Param #
FM                                       20
├─Embedding: 1-1                         955,130
├─Embedding: 1-2                         3,221,010
├─Sequential: 1-3                        --
│    └─Linear: 2-1                       550
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       1,275
├─Sequential: 1-4                        --
│    └─Linear: 2-4                       550
│    └─ReLU: 2-5                         --
│    └─Linear: 2-6                       1,275
├─Sequential: 1-5                        --
│    └─Linear: 2-7                       2,130
│    └─ReLU: 2-8                         --
│    └─Linear: 2-9                       341
Total params: 4,182,281
Trainable params: 4,182,281
Non-trainable params: 0

In [250]:
optimizer = optim.Adam(model.parameters(), lr=lr)

print("START TRAINING USING DEVICE {}".format(device))

for it in tqdm(range(0,epoch), disable=None):
    for index,item in enumerate(dataloader_train):
        inputs = [item[i].to(device) for i in range(2)]
        ground_truth = item[2].to(device)
        preds = model(inputs)
        l2_reg = torch.tensor(0.).to(device)
        for param in model.params():
            l2_reg += torch.norm(param)
        loss = torch.norm(preds - ground_truth, p=2) + alpha * l2_reg
        # loss = torch.SUM((ground_truth / 10) * np.log(preds) + (1 - ground_truth / 10) * np.log(1 - preds))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("loss={}".format(loss))

START TRAINING USING DEVICE cpu
loss=1440.9405517578125
loss=1311.1566162109375
loss=1196.114990234375
loss=1202.2314453125
loss=1116.832763671875
loss=965.256103515625
loss=882.12109375
loss=820.702392578125
loss=737.8413696289062
loss=676.1389770507812


In [251]:
RMSE = 0
model.eval()
with torch.no_grad():
    for index,item in enumerate(dataloader_test):
        inputs = [item[i].to(device) for i in range(2)]
        preds = model(inputs)
        ground_truth = item[2].to(device)
        RMSE += torch.sum(torch.square(preds - ground_truth))
        RMSE = RMSE/dataset_test.__len__()
    print(RMSE)

tensor(0.4929)


In [252]:
preds

tensor([9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9., 9.,
        9., 9., 9., 9., 9., 9., 9., 9., 

### 训练记录

使用Adam损失函数训练，RMSE=36.8019

使用Adam损失函数训练，使用Softmax作为激活函数,对1-10的分数预测，RMSE=0.1794， epoch=10, latent=50

使用Adam损失函数训练，使用Softmax作为激活函数，对0-10的分数预测，RMSE=0.1336， epoch=10, latent=50