In [30]:
#%%
import numpy as np
import math
from sklearn.model_selection import train_test_split
from data import *
from evaluation import *
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn,optim
from torch.autograd import Variable
from torchvision import datasets,transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

import pandas as pd
data_dir = 'datasets/ml-100k/ub.base'
N, M, data_list, _ = load_data(file_dir=data_dir)
print(' data length: %d \n user number: %d \n item number: %d' %(len(data_list),N,M))

#
# ### 分割数据集
#
#
train_list, test_list = train_test_split(data_list,test_size=0.2)
print ('train length: %d \n test length: %d' %(len(train_list),len(test_list)))
#
# ### 将 list 转换成 矩阵
train_mat = sequence2mat(sequence = train_list, N = N, M = M)
test_mat = sequence2mat(sequence = test_list, N = N, M = M)
print(train_mat.shape)

# 定义网络结构
class Net(nn.Module):
    def __init__(self,
                 users_num=None,  # 用户数
                 items_num=None,  # 商品数
                 embedding_size=100,  # 嵌入空间维度
                 hidden_sizes=[16, 8],  # 隐层节点数目
                 learning_rate=0.005,  # 学习率
                 lamda_regularizer=0.1,  # 正则项系数
                 batch_size=256  # batch大小
                 ):
        super(Net, self).__init__()
        self.users_num = users_num
        self.items_num = items_num
        self.embedding_size = embedding_size
        self.hidden_sizes = hidden_sizes
        self.learning_rate = learning_rate
        self.lamda_regularizer = lamda_regularizer
        self.batch_size = batch_size
        self.user_embedding=nn.Embedding(users_num,embedding_size)
        self.item_embedding=nn.Embedding(items_num,embedding_size)
        self.fc1 =nn.Linear(embedding_size*2, 64)
        self.relu1=nn.ReLU()
        self.fc2=nn.Linear(in_features=64,out_features=16,bias=True)
        self.relu2=nn.ReLU()
        self.prediction_layer=nn.Linear(in_features=16,out_features=1,bias=True)
    def forward(self, users_input,items_input):
        embed_users=self.user_embedding(users_input)
        embed_items=self.item_embedding(items_input)
        x=torch.concat([embed_items,embed_users],1)
        x=self.fc1(x)
        x=self.relu1(x)
        x=self.fc2(x)
        x=self.relu2(x)
        x=self.prediction_layer(x)

        return x
class PMFDATASET(Dataset):
    def __init__(self, u_id, i_id, rating):
        self.u_id = u_id
        self.i_id = i_id
        self.rating = rating

    def __getitem__(self, index):
        return self.u_id[index], self.i_id[index], self.rating[index]
    def __len__(self):
        return len(self.rating)

def training(model, trainData,predData, batch_size,num_epochs=20, learning_rate=0.005):
    train_dataset=PMFDATASET(trainData[:,0],trainData[:,1],predData)
    train_loader=DataLoader(train_dataset,batch_size=batch_size)
    #使用定义优化器
    optimizer=optim.Adam(model.parameters(),lr=learning_rate)
    #定义损失函数 交叉熵代价函数
    mess_loss=nn.MSELoss()
    train_ls, valid_ls = [], []
    rmse=[]
    for epoch in range(num_epochs):
        total_loss, total_len = 0.0, 0
        total_loss2=0.0
        for user_id,item_id,rating in train_loader:
            #rating = rating.to(torch.int64)
            y_pred=model(user_id,item_id)
            l=mess_loss(y_pred,rating)
            optimizer.zero_grad()  #梯度清零
            l.backward()
            optimizer.step()
            total_loss+=l.item()
            total_loss2+=l.item()
            total_len+=len(y_pred)

        train_ls.append(100*total_loss/total_len)
        rmse.append(math.sqrt(100*total_loss2/total_len))
        print('epoch %d, train mae %f, train rmse %f' % (epoch + 1, train_ls[-1],rmse[-1]))
    return train_ls
if __name__ == '__main__':
    data=pd.read_table("datasets/ml-100k/ub.base",engine='python',
                       names=['uid', 'iid', 'score','timestramp'])

    trianing_data=data.iloc[:,:2]
    pred_data=data.iloc[:,2]
    #转换为tensor
    trianing_data=torch.tensor(trianing_data.values,dtype=torch.int64)
    pred_data=torch.tensor(pred_data.values,dtype=torch.float32)
    model=Net(  users_num=N*2,  # 用户数
                 items_num=M*2)# 商品数)
    training(model=model,trainData=trianing_data,predData=pred_data,batch_size=256)



 data length: 100000 
 user number: 943 
 item number: 1682
train length: 80000 
 test length: 20000
(943, 1682)
epoch 1, train mae 0.725925, train rmse 0.852013
epoch 2, train mae 0.524327, train rmse 0.724104
epoch 3, train mae 0.513667, train rmse 0.716706
epoch 4, train mae 0.521281, train rmse 0.721998
epoch 5, train mae 0.536829, train rmse 0.732686
epoch 6, train mae 0.527519, train rmse 0.726305
epoch 7, train mae 0.511039, train rmse 0.714870
epoch 8, train mae 0.492299, train rmse 0.701640
epoch 9, train mae 0.491708, train rmse 0.701219
epoch 10, train mae 0.494730, train rmse 0.703371
epoch 11, train mae 0.488816, train rmse 0.699154
epoch 12, train mae 0.480723, train rmse 0.693342
epoch 13, train mae 0.481666, train rmse 0.694021
epoch 14, train mae 0.481765, train rmse 0.694093
epoch 15, train mae 0.477916, train rmse 0.691315
epoch 16, train mae 0.475588, train rmse 0.689629
epoch 17, train mae 0.473118, train rmse 0.687836
epoch 18, train mae 0.471778, train rmse 0.686