<a href="https://colab.research.google.com/github/stellaryoung/data_match/blob/master/03_GraphRec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pickle
import numpy as np
import torch
import torch.nn as nn

### Attention.py


In [None]:
import torch
import torch.nn as nn
from torch.nn import init
import numpy as np
import random
import torch.nn.functional as F


class Attention(nn.Module):
    def __init__(self, embedding_dims):
        super(Attention, self).__init__()
        self.embed_dim = embedding_dims
        self.bilinear = nn.Bilinear(self.embed_dim, self.embed_dim, 1)
        self.att1 = nn.Linear(self.embed_dim * 2, self.embed_dim)
        self.att2 = nn.Linear(self.embed_dim, self.embed_dim)
        self.att3 = nn.Linear(self.embed_dim, 1)
        self.softmax = nn.Softmax(0)

    def forward(self, node1, u_rep, num_neighs):
        uv_reps = u_rep.repeat(num_neighs, 1)
        x = torch.cat((node1, uv_reps), 1)
        x = F.relu(self.att1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.att2(x))
        x = F.dropout(x, training=self.training)
        x = self.att3(x)
        att = F.softmax(x, dim=0)
        return att

### Social_Aggregator.py

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import random
from Attention import Attention
class Social_Aggregator(nn.Module):
    """
    Social Aggregator: for aggregating embeddings of social neighbors.
    """
    def __init__(self, features, u2e, embed_dim, cuda="cpu"):
        super(Social_Aggregator, self).__init__()

        self.features = features
        self.device = cuda
        self.u2e = u2e
        self.embed_dim = embed_dim
        self.att = Attention(self.embed_dim)
    def forward(self, nodes, to_neighs):
        embed_matrix = torch.empty(len(nodes), self.embed_dim, dtype=torch.float).to(self.device)
        for i in range(len(nodes)):
            tmp_adj = to_neighs[i]
            num_neighs = len(tmp_adj)
            # 
            e_u = self.u2e.weight[list(tmp_adj)] # fast: user embedding 
            #slow: item-space user latent factor (item aggregation)
            #feature_neigbhors = self.features(torch.LongTensor(list(tmp_adj)).to(self.device))
            #e_u = torch.t(feature_neigbhors)

            u_rep = self.u2e.weight[nodes[i]]

            att_w = self.att(e_u, u_rep, num_neighs)
            att_history = torch.mm(e_u.t(), att_w).t()
            embed_matrix[i] = att_history
        to_feats = embed_matrix
        return to_feats

### Social_encoder.py

In [None]:
import torch
import torch.nn as nn
from torch.nn import init
import torch.nn.functional as F
# 注意：这里聚合的embedding是从用户-物品图中学习到user embedding!!!!!
# 
class Social_Encoder(nn.Module):
    def __init__(self, features, embed_dim, social_adj_lists, aggregator, base_model=None, cuda="cpu"):
        super(Social_Encoder, self).__init__()

        self.features = features
        self.social_adj_lists = social_adj_lists
        self.aggregator = aggregator
        if base_model != None:
            self.base_model = base_model
        self.embed_dim = embed_dim
        self.device = cuda
        self.linear1 = nn.Linear(2 * self.embed_dim, self.embed_dim)  #

    def forward(self, nodes):
        to_neighs = []
        for node in nodes:
            to_neighs.append(self.social_adj_lists[int(node)])
        neigh_feats = self.aggregator.forward(nodes, to_neighs)  # user-user network
        self_feats = self.features(torch.LongTensor(nodes.cpu().numpy())).to(self.device)
        self_feats = self_feats.t()   
        # self-connection could be considered.
        # 将社会关系图中用户的原始embedding(即item space的user embedding)
        # 与聚合的邻居的embedding拼接在一起输入MLP网络得到
        # 最终的user表示
        combined = torch.cat([self_feats, neigh_feats], dim=1)
        combined = F.relu(self.linear1(combined))
        return combined

In [None]:
agg_u_social = Social_Aggregator(lambda nodes: enc_u_history(nodes).t(), u2e, embed_dim, cuda=device)
enc_u = Social_Encoder(lambda nodes: enc_u_history(nodes).t(), embed_dim, social_adj_lists, agg_u_social,
                        base_model=enc_u_history, cuda=device)

### UV_Aggregator.py

In [None]:
# 该模型最终返回的是用户与物品的embedding矩阵
class UV_Aggregator(nn.Module):
    """
    item and user aggregator: for aggregating embeddings of neighbors (item/user aggreagator).
    """

    def __init__(self, v2e, r2e, u2e, embed_dim, cuda="cpu", uv=True):
        super(UV_Aggregator, self).__init__()
        self.uv = uv
        self.v2e = v2e
        self.r2e = r2e
        self.u2e = u2e
        self.device = cuda
        self.embed_dim = embed_dim
        self.w_r1 = nn.Linear(self.embed_dim * 2, self.embed_dim)
        self.w_r2 = nn.Linear(self.embed_dim, self.embed_dim)
        self.att = Attention(self.embed_dim)

    def forward(self, nodes, history_uv, history_r):

        embed_matrix = torch.empty(len(history_uv), self.embed_dim, dtype=torch.float).to(self.device)
        # 得到嵌入矩阵
        for i in range(len(history_uv)):
            history = history_uv[i]
            num_histroy_item = len(history)
            tmp_label = history_r[i]

            if self.uv == True:
                # user component
                e_uv = self.v2e.weight[history]
                uv_rep = self.u2e.weight[nodes[i]]
            else:
                # item component
                e_uv = self.u2e.weight[history]
                uv_rep = self.v2e.weight[nodes[i]]

            e_r = self.r2e.weight[tmp_label]
            x = torch.cat((e_uv, e_r), 1)
            x = F.relu(self.w_r1(x))
            o_history = F.relu(self.w_r2(x))
            # 注意力机制
            att_w = self.att(o_history, uv_rep, num_histroy_item)
            att_history = torch.mm(o_history.t(), att_w)
            att_history = att_history.t()

            embed_matrix[i] = att_history
        to_feats = embed_matrix
        return to_feats

 ### UV_Encoder.py

In [None]:
# 用户物品图上的主网络模块，UV_Aggreator作为子网络模块，在该网络模块中被调用。
# 输出：用户/物品在聚合邻居后的表示信息
class UV_Encoder(nn.Module):
    # features:用户或者物品的embedding
    # embed_dim:嵌入的维度
    # history_uv_list:目标用户的历史购买物品或者目标物品的历史购买用户
    # history_r_lists:对应的评分记录
    # aggregator:聚合网络

    def __init__(self, features, embed_dim, history_uv_lists, history_r_lists, aggregator, cuda="cpu", uv=True):
        super(UV_Encoder, self).__init__()

        self.features = features
        self.uv = uv
        self.history_uv_lists = history_uv_lists
        self.history_r_lists = history_r_lists
        self.aggregator = aggregator
        self.embed_dim = embed_dim
        self.device = cuda
        self.linear1 = nn.Linear(2 * self.embed_dim, self.embed_dim)  #

    def forward(self, nodes):
        tmp_history_uv = []
        tmp_history_r = []
        for node in nodes:
            tmp_history_uv.append(self.history_uv_lists[int(node)])
            tmp_history_r.append(self.history_r_lists[int(node)])

        neigh_feats = self.aggregator.forward(nodes, tmp_history_uv, tmp_history_r)  # user-item network

        self_feats = self.features.weight[nodes]
        # self-connection could be considered.
        # 这里将物品/用户的原始embedding与从邻居聚合来embedding拼接在一起
        # 然后进行线性变换，最终变换为最原始的维度
        combined = torch.cat([self_feats, neigh_feats], dim=1)
        combined = F.relu(self.linear1(combined))

        return combined

In [None]:
agg_u_history = UV_Aggregator(v2e, r2e, u2e, embed_dim, cuda=device, uv=True)
enc_u_history = UV_Encoder(u2e, embed_dim, history_u_lists, history_ur_lists, agg_u_history, cuda=device, uv=True)


### GraphRec

In [None]:
# enc_u:       用户建模的网络模块
# enc_v_history：  物品建模的网络模块
# 这里在实例化GraphRec类的时候，直接将2个网络模块的实例传输进去，减少总网络模块的参数，也是一种代码方式吧
class GraphRec(nn.Module):
    def __init__(self, enc_u, enc_v_history, r2e):
        super(GraphRec, self).__init__()
        self.enc_u = enc_u
        self.enc_v_history = enc_v_history
        self.embed_dim = enc_u.embed_dim
        self.w_ur1 = nn.Linear(self.embed_dim, self.embed_dim)
        self.w_ur2 = nn.Linear(self.embed_dim, self.embed_dim)
        self.w_vr1 = nn.Linear(self.embed_dim, self.embed_dim)
        self.w_vr2 = nn.Linear(self.embed_dim, self.embed_dim)
        self.w_uv1 = nn.Linear(self.embed_dim * 2, self.embed_dim)
        self.w_uv2 = nn.Linear(self.embed_dim, 16)
        self.w_uv3 = nn.Linear(16, 1)
        self.r2e = r2e    # 这个代码没有用到
        # 用到4个batch norm层
        self.bn1 = nn.BatchNorm1d(self.embed_dim, momentum=0.5)
        self.bn2 = nn.BatchNorm1d(self.embed_dim, momentum=0.5)
        self.bn3 = nn.BatchNorm1d(self.embed_dim, momentum=0.5)
        self.bn4 = nn.BatchNorm1d(16, momentum=0.5)
        self.criterion = nn.MSELoss()

    def forward(self, nodes_u, nodes_v):
        # 得到用户向量表示，物品的向量表示
        embeds_u = self.enc_u(nodes_u)
        embeds_v = self.enc_v_history(nodes_v)

        x_u = F.relu(self.bn1(self.w_ur1(embeds_u)))
        x_u = F.dropout(x_u, training=self.training)
        x_u = self.w_ur2(x_u)
        x_v = F.relu(self.bn2(self.w_vr1(embeds_v)))
        x_v = F.dropout(x_v, training=self.training)
        x_v = self.w_vr2(x_v)

        x_uv = torch.cat((x_u, x_v), 1)
        x = F.relu(self.bn3(self.w_uv1(x_uv)))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.bn4(self.w_uv2(x)))
        x = F.dropout(x, training=self.training)
        scores = self.w_uv3(x)
        return scores.squeeze()

    def loss(self, nodes_u, nodes_v, labels_list):
        scores = self.forward(nodes_u, nodes_v)
        return self.criterion(scores, labels_list)

### 代码运行

### 论文思路：
**该模型的思路还是比较直观的，用2张图，分别是用户-物品图，以及用户-用户图，然后应用图网络aggregation的思想，进行特征表示。**
* 本文分别对用户、物品、用户物品的评分进行embedding,这里对评分进行
embedding可以**理解为对用户-物品图(用户评分矩阵)的边进行表示**。
* 评分的embedding非常直接，如果有[1,2,3,4,5],那么则对应5个d维的embedding向量。 
***
第一部分：表示用户信息，分别是物品聚合以及用户聚合得到的向量进行拼接
* step1:物品聚合：从用户购买过的**历史物品**的图信息进行聚合，需要用到
item和score的embedding
* step2:社会关系聚合：从社交关系图上进行聚合，聚合**用户的邻居**，需要用到从**用户物品图上**得到的user embedding
***
第二部分：表示物品信息，使用用户聚合 
* 用户聚合：从**买过该物品的历史用户**进行聚合，需要用到user embedding和score embedding。
***
注意：
* **用户聚合与物品聚合从模型的操作来讲，本质是一样的，只是用户聚合用的是item embedding,物品聚合使用的是user embedding，因此代码中UV_Aggregator、UV_Encoder可以都适用于用户与物品。**
* 所有聚合的方法是使用**注意力机制**
***
第三部分：将表示拼接作为网络输入进行训练



In [None]:
def train(model, device, train_loader, optimizer, epoch, best_rmse, best_mae):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        batch_nodes_u, batch_nodes_v, labels_list = data
        optimizer.zero_grad()
        loss = model.loss(batch_nodes_u.to(device), batch_nodes_v.to(device), labels_list.to(device))
        loss.backward(retain_graph=True)
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 0:
            print('[%d, %5d] loss: %.3f, The best rmse/mae: %.6f / %.6f' % (
                epoch, i, running_loss / 100, best_rmse, best_mae))
            running_loss = 0.0
    return 0


def test(model, device, test_loader):
    model.eval()
    tmp_pred = []
    target = []
    with torch.no_grad():
        for test_u, test_v, tmp_target in test_loader:
            test_u, test_v, tmp_target = test_u.to(device), test_v.to(device), tmp_target.to(device)
            val_output = model.forward(test_u, test_v)
            tmp_pred.append(list(val_output.data.cpu().numpy()))
            target.append(list(tmp_target.data.cpu().numpy()))
    tmp_pred = np.array(sum(tmp_pred, []))
    target = np.array(sum(target, []))
    expected_rmse = sqrt(mean_squared_error(tmp_pred, target))
    mae = mean_absolute_error(tmp_pred, target)
    return expected_rmse, mae

### 模型的训练与测试
* 训练过程中需要融入edge embedding训练网络参数，得到用户与物品的最终表示
* 推断过程直接使用最终得到的用户与物品表示进行推断。

In [None]:
path_data = '/content/drive/My Drive/recommend_system/graph_rec_data/toy_dataset.pickle'
data_file = open(path_data, 'rb')

In [None]:
# 字典、列表、字典、字典
# history_u_lists：用户购买历史记录（用户-物品列表的字典)
# history_ur_lists：用户的评分
# history_v_lists
# history_vr_lists
# rain_u,train_v,train_r:training_set (user, item, rating)，用户，物品，评分
# test_u,test_v,test_r:testing set (user, item, rating)，用户，物品，评分
# social_adj_lists：用户间的邻接矩阵
# ratings_list：评分列表字典，key:value ----- 评分(0.5-4.0)：值（0-7），8个维度
history_u_lists,history_ur_lists,history_v_lists,history_vr_lists,\   
train_u,train_v,train_r,\
test_u,test_v,test_r,\
social_adj_lists,ratings_list=pickle.load(data_file)

In [None]:
print(type(history_u_lists)) # 带有默认值的字典
print(type(train_u))
print(type(social_adj_lists))
print(type(ratings_list))

<class 'collections.defaultdict'>
<class 'list'>
<class 'collections.defaultdict'>
<class 'dict'>


In [None]:
batch_size = 128
test_batch_size = 1000
trainset = torch.utils.data.TensorDataset(torch.LongTensor(train_u), torch.LongTensor(train_v),torch.FloatTensor(train_r))
testset = torch.utils.data.TensorDataset(torch.LongTensor(test_u), torch.LongTensor(test_v),torch.FloatTensor(test_r))
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=test_batch_size, shuffle=True)
num_users = history_u_lists.__len__()
num_items = history_v_lists.__len__()
num_ratings = ratings_list.__len__()
print(num_users,num_items,num_ratings) # 用户数：705 物品数：1941  评分维度：8

705 1941 8


In [None]:
# step1:对用户、物品、评分分别进行嵌入表示
u2e = nn.Embedding(num_users, embed_dim).to(device)
v2e = nn.Embedding(num_items, embed_dim).to(device)
r2e = nn.Embedding(num_ratings, embed_dim).to(device)

# user feature(用户特征)
# features: item * rating
agg_u_history = UV_Aggregator(v2e, r2e, u2e, embed_dim, cuda=device, uv=True)
enc_u_history = UV_Encoder(u2e, embed_dim, history_u_lists, history_ur_lists, agg_u_history, cuda=device, uv=True)

# neighobrs
agg_u_social = Social_Aggregator(lambda nodes: enc_u_history(nodes).t(), u2e, embed_dim, cuda=device)
enc_u = Social_Encoder(lambda nodes: enc_u_history(nodes).t(), embed_dim, social_adj_lists, agg_u_social,
                        base_model=enc_u_history, cuda=device)

# item feature: user * rating (物品特征)
agg_v_history = UV_Aggregator(v2e, r2e, u2e, embed_dim, cuda=device, uv=False)
enc_v_history = UV_Encoder(v2e, embed_dim, history_v_lists, history_vr_lists, agg_v_history, cuda=device, uv=False)

# model
# enc_u:社会关系图中的user latent factor
# enc_v_history:用户物品图中的user latent factor和item latent factor
graphrec = GraphRec(enc_u, enc_v_history, r2e).to(device)
optimizer = torch.optim.RMSprop(graphrec.parameters(), lr=args.lr, alpha=0.9)

In [None]:
best_rmse = 9999.0
best_mae = 9999.0
endure_count = 0

for epoch in range(1, args.epochs + 1):

    train(graphrec, device, train_loader, optimizer, epoch, best_rmse, best_mae)
    expected_rmse, mae = test(graphrec, device, test_loader)
    # please add the validation set to tune the hyper-parameters based on your datasets.

    # early stopping (no validation set in toy dataset)
    if best_rmse > expected_rmse:
        best_rmse = expected_rmse
        best_mae = mae
        endure_count = 0
    else:
        endure_count += 1
    print("rmse: %.4f, mae:%.4f " % (expected_rmse, mae))

    if endure_count > 5:
        break

### 问题的思考
模型在训练阶段要输入评分记录，在测试阶段没有评分记录,模型如何进行推断？

答：
* 模型训练实际上就是训练一套网络参数，训练时用到用户物品历史评分记录、社交关系记录得到最终的用户表示与物品表示
* 测试时只需要用到最终的用户表示与物品表示进行推断，本论文在代码实现上在测试集上并没有将训练好的用户表示与物品表示直接提取出来。而是利用网络模块重新计算一遍。**实际效果与提取出来应该是一致的**
* 如果有大量新的用户与物品加入，这种用户与物品的表示需要重新计算。

模型与数据的分离？
* 这篇论文的代码实现，模型与代码结合过于紧密，没法在只有测试集与模型训练好的参数下得到测试集评分，网络模块的重新定义一定要用到训练集
* 以后在实现时，不仅保存模型参数，还可以保存模型输出的最终物品与用户表示，当有最终的用户与物品表示时，就可以在测试时不用在forward一遍了。实际上测试集往往小于训练集，采用论文中的做法，在测试集推断的计算量更加小。
* 基于上述原因，该论文的网络定义存在一种别扭感（主要时edge的存在）
* 该论文在训练时的评分信息，不仅在最终损失函数中被使用，还作为edge
embedding嵌入到网络中。

