# 图自编码应用 

先用troch 版本进行测试


In [1]:
import pandas as pd 
import numpy as np  
import scipy.sparse as sp 
import torch 
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F 
import torch.nn.init as init 
from sklearn.model_selection import train_test_split
import time 

import random
random.seed(1111)

In [None]:
def globally_normalize_bipartite_adjacencies(adjacencies,symmetric=True):
    adj_tot = np.sum([adj for adj in adjacencies])
    degree_u = np.asarray(adj_tot.sum(1)).flatten()
    degree_v = np.asarray(adj_tot.sum(0)).flatten()

    # set zeros to inf to avoid dividing by zero
    degree_u[degree_u == 0.] = np.inf
    degree_v[degree_v == 0.] = np.inf

    degree_u_inv_sqrt = 1. / np.sqrt(degree_u)
    degree_v_inv_sqrt = 1. / np.sqrt(degree_v)
    degree_u_inv_sqrt_mat = sp.diags([degree_u_inv_sqrt], [0])
    degree_v_inv_sqrt_mat = sp.diags([degree_v_inv_sqrt], [0])

    degree_u_inv = degree_u_inv_sqrt_mat.dot(degree_u_inv_sqrt_mat)

    if symmetric:
        adj_norm = [degree_u_inv_sqrt_mat.dot(adj).dot(
            degree_v_inv_sqrt_mat) for adj in adjacencies]

    else:
        adj_norm = [degree_u_inv.dot(adj) for adj in adjacencies]

    return adj_norm

def get_adjacency(edge_df,num_user,num_movie,sysmetric_normalization):
     user2item_adj=[]
     item2user_adj=[]
     train_edge_df=edge_df.loc[edge_df['usage']==1]
     edge_index=train_edge_df.loc[:,['userId', 'movieId']].to_numpy()
     support = sp.csr_matrix((np.ones(len(edge_index)), (edge_index[:, 0], edge_index[:, 1])),
                                shape=(num_user, num_movie), dtype=np.float32)
     user2item_adj.append(support)
     item2user_adj.append(support.T)
     
     user2item_adj=globally_normalize_bipartite_adjacencies(user2item_adj,symmetric=sysmetric_normalization)
     item2user_adj=globally_normalize_bipartite_adjacencies(item2user_adj,symmetric=sysmetric_normalization)
     
     return user2item_adj,item2user_adj

def get_node_identity_feature(nums_user,nums_item):
     identity_feature=np.identity(nums_user+nums_item,dtype=np.float32)
     user_identity_feature,item_identity_feature=identity_feature[:nums_user],identity_feature[nums_user:]
     return user_identity_feature,item_identity_feature

def checkId(traindf:pd.DataFrame,testdf:pd.DataFrame):
     ''' 保证testdf 中的userId 和itemId 在traindf中出现过'''
     totalTrainUserId=traindf['userId'].drop_duplicates()
     testdf=testdf.loc[testdf['userId'].apply(lambda x: x  in totalTrainUserId),:]
     totalTrainItemId=traindf['movieId'].drop_duplicates()
     testdf=testdf.loc[testdf['movieId'].apply(lambda x: x in totalTrainItemId),:]
     return testdf
 
               
          
     

def load_data(ratings:pd.DataFrame):
     '''
    处理数据, 输出邻接矩阵和特征矩阵(目前将rating 处理为0,1 情况, 只考虑其是否acting)
    ---------------
     Args:
          ratings  {pd.DataFrame} 
     Return:
          num_users  : int 
          Number of users and items
          num_items  : int
          
          user_indices   : np.int32 arrays
          User indices
          item_indices   : np.int32 arrays
          Item indices
          
          user2item_adjs,item2user_adjs:  np.float32 arrays
          不同rating 下的User/Item 邻接矩阵
          
          train_mask     :np.bool arrays
          
     '''
     movieids=ratings.movieId.drop_duplicates()
     movieDicts={u:v for u,v in zip(movieids,range(len(movieids)))}
     ratings['userId']=ratings['userId']-1
     ratings['movieId']=ratings.movieId.map(movieDicts)
     ratings['usage']=np.random.binomial(1,0.8,ratings.shape[0])
     X_train,X_val=train_test_split(ratings.loc[ratings['usage']==1,:],test_size=0.25)
     X_val=checkId(X_train,X_val)
     X_train['usage']=1
     X_val['usage']=0
     X=pd.concat([X_train,X_val],axis=0)
     X['label']=1
     X_test=ratings.loc[ratings['usage']==0,:] #NOTE testset 需要重新写!!!!!
     X_test=checkId(X,X_test)
     X_test['label']=1
     
     
     num_users=X.userId.max()+1
     num_items=X.movieId.max()+1

     user2item_adjs,item2user_adjs=get_adjacency(X,num_users,num_items,sysmetric_normalization=True)
     
     #one-hot encoding for nodes
     user_indentity_feature, item_indentity_feature = get_node_identity_feature(num_users,num_items)
     
     train_mask=(X['usage']==1).to_numpy()
     user_indices,item_indices=X[['userId','movieId']].to_numpy().T
     user_indices_test,item_indices_test=X_test[['userId','movieId']].to_numpy().T

     train_labels=X['label'].to_numpy()
     test_labels=X_test['label'].to_numpy()

     return user2item_adjs,item2user_adjs,\
          user_indentity_feature,item_indentity_feature,\
          user_indices,item_indices,user_indices_test,item_indices_test,\
          train_labels,test_labels,train_mask,num_users,num_items

    

In [None]:
ratings=pd.read_csv('data/ratings.csv')


user2item_adjs,item2user_adjs,user_indentity_feature,item_indentity_feature,\
    user_indices,item_indices,user_indices_test,item_indices_test,\
        train_labels,test_labels,train_mask,num_users,num_items=load_data(ratings) 


In [None]:
user2item_adjs[0].sum()

1294.6162

In [None]:

class InputDropout(nn.Module):
    def __init__(self, keep_prob):
        super(InputDropout, self).__init__()
        self.p = keep_prob

    def forward(self, inputs):
        x = inputs.clone()
        if self.training:
            random_tensor = self.p + torch.rand((inputs.size(0),))
            dropout_mask = torch.floor(random_tensor).bool()
            x[~dropout_mask] = 0.
            return x / self.p
        else:
            return x


class StackGCNEncoder(nn.Module):
    def __init__(self, input_dim, output_dim, num_support,
                 dropout=0.,
                 use_bias=False, activation=F.relu):
        """对得到的每类评分使用级联的方式进行聚合
        
        Args:
        ----
            input_dim (int): 总节点个数
            output_dim (int): 输出的特征维度，需要output_dim % num_support = 0
            num_support (int): 评分的类别数，比如1~5分，值为5
            use_bias (bool, optional): 是否使用偏置. Defaults to False.
            activation (optional): 激活函数. Defaults to F.relu.
        """
        super(StackGCNEncoder, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_support = num_support
        self.dropout = dropout
        self.use_bias = use_bias
        self.activation = activation
        assert output_dim % num_support == 0
        self.weight = nn.Parameter(torch.Tensor(input_dim, output_dim))
        if self.use_bias:
            self.bias_user = nn.Parameter(torch.Tensor(output_dim, ))
            self.bias_item = nn.Parameter(torch.Tensor(output_dim, ))
        self.dropout = InputDropout(1 - dropout)
        self.reset_parameters()

    def reset_parameters(self):
        init.xavier_uniform_(self.weight)
        if self.use_bias:
            init.zeros_(self.bias_user)
            init.zeros_(self.bias_item)

    def forward(self, user_supports, item_supports, user_inputs, item_inputs):
        """StackGCNEncoder计算逻辑
        
        Args:
            user_supports (list of torch.sparse.FloatTensor): 
                归一化后每个评分等级对应的用户与商品邻接矩阵
            item_supports (list of torch.sparse.FloatTensor):
                归一化后每个评分等级对应的商品与用户邻接矩阵
            user_inputs (torch.Tensor): 用户特征的输入
            item_inputs (torch.Tensor): 商品特征的输入
        
        Returns:
            [torch.Tensor]: 用户的隐层特征
            [torch.Tensor]: 商品的隐层特征
        """
        assert len(user_supports) == len(item_supports) == self.num_support
        user_inputs = self.dropout(user_inputs)
        item_inputs = self.dropout(item_inputs)
        user_hidden = []
        item_hidden = []
        weights = torch.split(self.weight, self.output_dim//self.num_support, dim=1)
        for i in range(self.num_support):
            tmp_u = torch.matmul(user_inputs, weights[i]) #Nu x D
            tmp_v = torch.matmul(item_inputs, weights[i]) #Ni x D
            assert user_supports[i].shape[1]==tmp_v.shape[0],'user_adj shape:{},tmp_v shape:{}'.format(user2item_adjs[0].shape,tmp_v.shape)
            assert item_supports[i].shape[1]==tmp_u.shape[0],'user_adj shape:{},tmp_v shape:{}'.format(user2item_adjs[0].shape,tmp_v.shape)
            tmp_user_hidden = torch.sparse.mm(user_supports[i], tmp_v)
            tmp_item_hidden = torch.sparse.mm(item_supports[i], tmp_u)
            user_hidden.append(tmp_user_hidden)
            item_hidden.append(tmp_item_hidden)

        user_hidden = torch.cat(user_hidden, dim=1)
        item_hidden = torch.cat(item_hidden, dim=1)

        user_outputs = self.activation(user_hidden)
        item_outputs = self.activation(item_hidden)

        if self.use_bias:
            user_outputs += self.bias_user
            item_outputs += self.bias_item

        return user_outputs, item_outputs


class SumGCNEncoder(nn.Module):
    def __init__(self, input_dim, output_dim, num_support,
                 dropout=0.,
                 use_bias=False, activation=F.relu):
        """对得到的每类评分使用求和的方式进行聚合
        
        Args:
            input_dim (int): 输入的特征维度
            output_dim (int): 输出的特征维度，需要output_dim % num_support = 0
            num_support (int): 评分的类别数，比如1~5分，值为5
            use_bias (bool, optional): 是否使用偏置. Defaults to False.
            activation (optional): 激活函数. Defaults to F.relu.
        """
        super(SumGCNEncoder, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_support = num_support
        self.use_bias = use_bias
        self.activation = activation
        self.weight = nn.Parameter(torch.Tensor(
            input_dim, output_dim * num_support))
        if self.use_bias:
            self.bias_user = nn.Parameter(torch.Tensor(output_dim, ))
            self.bias_item = nn.Parameter(torch.Tensor(output_dim, ))
        self.dropout = InputDropout(1 - dropout)
        self.reset_parameters()

    def reset_parameters(self):
        init.xavier_uniform_(self.weight)
        if self.use_bias:
            init.zeros_(self.bias_user)
            init.zeros_(self.bias_item)

    def forward(self, user_supports, item_supports, user_inputs, item_inputs):
        """SumGCNEncoder计算逻辑
        
        Args:
            user_supports (list of torch.sparse.FloatTensor): 
                归一化后每个评分等级对应的用户与商品邻接矩阵
            item_supports (list of torch.sparse.FloatTensor):
                归一化后每个评分等级对应的商品与用户邻接矩阵
            user_inputs (torch.Tensor): 用户特征的输入
            item_inputs (torch.Tensor): 商品特征的输入
        
        Returns:
            [torch.Tensor]: 用户的隐层特征
            [torch.Tensor]: 商品的隐层特征
        """
        assert len(user_supports) == len(item_supports) == self.num_support
        user_inputs = self.dropout(user_inputs)
        item_inputs = self.dropout(item_inputs)


        user_hidden = []
        item_hidden = []
        weights = torch.split(self.weight, self.output_dim, dim=1)
        for i in range(self.num_support):
            w = sum(weights[:(i + 1)])
            tmp_u = torch.matmul(user_inputs, w)
            tmp_v = torch.matmul(item_inputs, w)
            tmp_user_hidden = torch.sparse.mm(user_supports[i], tmp_v)
            tmp_item_hidden = torch.sparse.mm(item_supports[i], tmp_u)
            user_hidden.append(tmp_user_hidden)
            item_hidden.append(tmp_item_hidden)

        user_hidden, item_hidden = sum(user_hidden), sum(item_hidden)
        user_outputs = self.activation(user_hidden)
        item_outputs = self.activation(item_hidden)

        if self.use_bias:
            user_outputs += self.bias_user
            item_outputs += self.bias_item

        return user_outputs, item_outputs


class FullyConnected(nn.Module):
    def __init__(self, input_dim, output_dim, dropout=0.,
                 use_bias=False, activation=F.relu,
                 share_weights=False):
        """非线性变换层
        
        Args:
        ----
            input_dim (int): 输入的特征维度
            output_dim (int): 输出的特征维度，需要output_dim % num_support = 0
            use_bias (bool, optional): 是否使用偏置. Defaults to False.
            activation (optional): 激活函数. Defaults to F.relu.
            share_weights (bool, optional): 用户和商品是否共享变换权值. Defaults to False.
        
        """
        super(FullyConnected, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.use_bias = use_bias
        self.activation = activation
        self.share_weights = share_weights
        if not share_weights:
            self.weights_u = nn.Parameter(torch.Tensor(input_dim, output_dim))
            self.weights_v = nn.Parameter(torch.Tensor(input_dim, output_dim))
            if use_bias:
                self.user_bias = nn.Parameter(torch.Tensor(output_dim))
                self.item_bias = nn.Parameter(torch.Tensor(output_dim))
        else:
            self.weights_u = nn.Parameter(torch.Tensor(input_dim, output_dim))
            self.weights_v = self.weights_u
            if use_bias:
                self.user_bias = nn.Parameter(torch.Tensor(output_dim))
                self.item_bias = self.user_bias
        self.dropout = nn.Dropout(dropout)
        self.reset_parameters()

    def reset_parameters(self):
        if not self.share_weights:
            init.xavier_uniform_(self.weights_u)
            init.xavier_uniform_(self.weights_v)
            if self.use_bias:
                init.normal_(self.user_bias, std=0.5)
                init.normal_(self.item_bias, std=0.5)
        else:
            init.xavier_uniform_(self.weights_u)
            if self.use_bias:
                init.normal_(self.user_bias, std=0.5)

    def forward(self, user_inputs, item_inputs):
        """前向传播
        
        Args:
            user_inputs (torch.Tensor): 输入的用户特征
            item_inputs (torch.Tensor): 输入的商品特征
        
        Returns:
            [torch.Tensor]: 输出的用户特征
            [torch.Tensor]: 输出的商品特征
        """
        x_u = self.dropout(user_inputs)
        x_u = torch.matmul(x_u, self.weights_u)

        x_v = self.dropout(item_inputs)
        x_v = torch.matmul(x_v, self.weights_v)

        u_outputs = self.activation(x_u)
        v_outputs = self.activation(x_v)

        if self.use_bias:
            u_outputs += self.user_bias
            v_outputs += self.item_bias

        return u_outputs, v_outputs


class Decoder(nn.Module):
    def __init__(self, input_dim, num_weights, num_classes, dropout=0., activation=F.relu):
        """解码器
        
        Args:
        ----
            input_dim (int): 输入的特征维度
            num_weights (int): basis weight number
            num_classes (int): 总共的评分级别数，eg. 5
        """
        super(Decoder, self).__init__()
        self.input_dim = input_dim
        self.num_weights = num_weights
        self.num_classes = num_classes
        self.activation = activation
        
        self.weight = nn.ParameterList([nn.Parameter(torch.Tensor(input_dim, input_dim))
                                        for _ in range(num_weights)])
        self.weight_classifier = nn.Parameter(torch.Tensor(num_weights, num_classes))

        self.dropout = nn.Dropout(dropout)
        self.reset_parameters()

    def reset_parameters(self):
        for i in range(len(self.weight)):
            init.orthogonal_(self.weight[i], gain=1.1)
        init.xavier_uniform_(self.weight_classifier)

    def forward(self, user_inputs, item_inputs, user_indices, item_indices,predicted=False):
        """计算非归一化的分类输出
        
        Args:
            user_inputs (torch.Tensor): 用户的隐层特征
            item_inputs (torch.Tensor): 商品的隐层特征
            user_indices (torch.LongTensor): 
                所有交互行为中用户的id索引，与对应的item_indices构成一条边,shape=(num_edges, )
            item_indices (torch.LongTensor): 
                所有交互行为中商品的id索引，与对应的user_indices构成一条边,shape=(num_edges, )
        
        Returns:
            [torch.Tensor]: 未归一化的分类输出，shape=(num_edges, num_classes)
        """
        if not predicted:
            user_inputs = self.dropout(user_inputs)
            item_inputs = self.dropout(item_inputs)
        user_inputs = user_inputs[user_indices]
        item_inputs = item_inputs[item_indices]
        
        basis_outputs = []
        for i in range(self.num_weights):
            tmp = torch.matmul(user_inputs, self.weight[i])
            out = torch.sum(tmp * item_inputs, dim=1, keepdim=True)
            basis_outputs.append(out)

        basis_outputs = torch.cat(basis_outputs, dim=1)
        
        outputs = torch.matmul(basis_outputs, self.weight_classifier)
        outputs = self.activation(outputs)
        
        return outputs


In [None]:
def to_torch_sparse_tensor(x, device):
    if not sp.isspmatrix_coo(x):
        x = sp.coo_matrix(x)
    row, col = x.row, x.col
    data = x.data

    indices = torch.from_numpy(np.asarray([row, col]).astype('int64')).long()
    values = torch.from_numpy(x.data.astype(np.float32))
    th_sparse_tensor = torch.sparse.FloatTensor(indices, values,
                                                x.shape).to(device)

    return th_sparse_tensor


def tensor_from_numpy(x, device):

    return torch.from_numpy(x).to(device)

def returnx(x):
    return x



class GraphMatrixCompletion(nn.Module):
    def __init__(self, input_dim, gcn_hidden_dim,
                 encode_hidden_dim,
                 num_support=1, num_classes=2, num_basis=4):
        super(GraphMatrixCompletion, self).__init__()
        self.u_embed=None
        self.i_embed=None
        self.encoder = StackGCNEncoder(input_dim, gcn_hidden_dim, num_support, dropout=DROPOUT_RATIO)
        # self.dense1 = FullyConnected(side_feat_dim, side_hidden_dim, dropout=0.,use_bias=True)
        self.dense2 = FullyConnected(gcn_hidden_dim, encode_hidden_dim,
                                     dropout=DROPOUT_RATIO, activation=returnx)
        self.decoder = Decoder(encode_hidden_dim, num_basis, num_classes,
                               dropout=DROPOUT_RATIO, activation=returnx)

    def forward(self, user_supports, item_supports,
                user_inputs, item_inputs,
                user_edge_idx, item_edge_idx):    #不使用feature
    # def forward(self, user_supports, item_supports,
    #             user_inputs, item_inputs,
    #             user_side_inputs, item_side_inputs,
    #             user_edge_idx, item_edge_idx):
        user_gcn, movie_gcn = self.encoder(user_supports, item_supports, user_inputs, item_inputs)
        # user_side_feat, movie_side_feat = self.dense1(user_side_inputs, item_side_inputs)

        # user_feat = torch.cat((user_gcn, user_side_feat), dim=1)
        # movie_feat = torch.cat((movie_gcn, movie_side_feat), dim=1)
        user_feat,movie_feat=user_gcn,movie_gcn
        user_embed, movie_embed = self.dense2(user_feat, movie_feat)
        
        # global u_embed
        # global i_embed
        self.u_embed=user_embed
        self.i_embed=movie_embed
        edge_logits = self.decoder(user_embed, movie_embed, user_edge_idx, item_edge_idx)

        return edge_logits

    def predict(self, user_edge_idx, item_edge_idx):

        return self.decoder(self.u_embed,self.i_embed,user_edge_idx,item_edge_idx,predicted=True).argmax(dim=1)
        
        

In [None]:
DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
LEARNING_RATE = 0.015
EPOCHS = 20
NODE_INPUT_DIM = num_items+num_users
# SIDE_FEATURE_DIM = 41
GCN_HIDDEN_DIM = 100
# SIDE_HIDDEN_DIM = 10
ENCODE_HIDDEN_DIM = 75
NUM_BASIS = 4
DROPOUT_RATIO = 0.8
WEIGHT_DACAY = 0.
SCORES = torch.tensor([[0,1]]).to(DEVICE)

In [None]:
user2movie_adjacencies = [to_torch_sparse_tensor(adj, DEVICE) for adj in user2item_adjs]
movie2user_adjacencies = [to_torch_sparse_tensor(adj, DEVICE) for adj in item2user_adjs]
user_identity_feature = tensor_from_numpy(user_indentity_feature, DEVICE).float()
movie_identity_feature = tensor_from_numpy(item_indentity_feature, DEVICE).float()
user_indices = tensor_from_numpy(user_indices, DEVICE).long()
movie_indices = tensor_from_numpy(item_indices, DEVICE).long()
labels = tensor_from_numpy(train_labels, DEVICE)
train_mask = tensor_from_numpy(train_mask, DEVICE)


In [None]:
model = GraphMatrixCompletion(NODE_INPUT_DIM, GCN_HIDDEN_DIM,
                             ENCODE_HIDDEN_DIM, num_basis=NUM_BASIS).to(DEVICE)
criterion = nn.CrossEntropyLoss().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DACAY)
model_inputs = (user2movie_adjacencies, movie2user_adjacencies,
                user_identity_feature, movie_identity_feature, user_indices, movie_indices)


In [None]:

criterion = nn.CrossEntropyLoss().to(DEVICE)
# criterion = nn.BCELoss().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DACAY)

def train():
    val_result = []
    model.train()
    for e in range(EPOCHS):
        t=time.time()
        logits = model(*model_inputs)
        logit=logits[train_mask]
        label= labels[train_mask]
        loss = criterion(logit,label)
        rmse = expected_rmse(logit,label)
        optimizer.zero_grad()
        loss.backward()  
        optimizer.step()  

        tr = test()
        val_result.append(tr)
        model.train()
        print(f"Epoch {e:04d}: TrainLoss: {loss.item():.4f}, TrainRMSE: {rmse.item():.4f}, "
              f"ValRMSE: {tr[0]:.4f}, ValLoss: {tr[1]:.4f},Times :{time.time()-t}")

    val_result = np.asarray(val_result)
    idx = val_result[:, 0].argmin()
    print(f'test min rmse {val_result[idx]} on epoch {idx}')


@torch.no_grad()
def test():
    model.eval()
    logits = model(*model_inputs)
    val_mask = ~train_mask
    logit= logits[val_mask]
    label= labels[val_mask]
    loss = criterion(logit, label)
    rmse = expected_rmse(logit, label)
    return rmse.item(), loss.item()


def expected_rmse(logits, label):
    true_y = label 
    prob = F.softmax(logits, dim=1)
    pred_y = torch.sum(prob * SCORES, dim=1)
    
    diff = torch.pow(true_y - pred_y, 2)
    
    return torch.sqrt(diff.mean())

def predictTestSet(user_indices_test,item_indices_test):
    pred=model.predict(user_indices_test,item_indices_test)
    return pred 

In [None]:
train()

Epoch 0000: TrainLoss: 0.6932, TrainRMSE: 0.5000, ValRMSE: 0.4998, ValLoss: 0.6927
Epoch 0001: TrainLoss: 0.6915, TrainRMSE: 0.4992, ValRMSE: 0.4957, ValLoss: 0.6845
Epoch 0002: TrainLoss: 0.6788, TrainRMSE: 0.4928, ValRMSE: 0.4680, ValLoss: 0.6311
Epoch 0003: TrainLoss: 0.5742, TrainRMSE: 0.4393, ValRMSE: 0.3471, ValLoss: 0.4226
Epoch 0004: TrainLoss: 0.4191, TrainRMSE: 0.3603, ValRMSE: 0.1171, ValLoss: 0.0991
Epoch 0005: TrainLoss: 0.2018, TrainRMSE: 0.2387, ValRMSE: 0.0045, ValLoss: 0.0026
Epoch 0006: TrainLoss: 0.1303, TrainRMSE: 0.2018, ValRMSE: 0.0000, ValLoss: 0.0000
Epoch 0007: TrainLoss: 0.0772, TrainRMSE: 0.1623, ValRMSE: 0.0000, ValLoss: 0.0000
Epoch 0008: TrainLoss: 0.0851, TrainRMSE: 0.1746, ValRMSE: 0.0000, ValLoss: 0.0000
Epoch 0009: TrainLoss: 0.1208, TrainRMSE: 0.2082, ValRMSE: 0.0000, ValLoss: 0.0000
Epoch 0010: TrainLoss: 0.0728, TrainRMSE: 0.1619, ValRMSE: 0.0000, ValLoss: 0.0000
Epoch 0011: TrainLoss: 0.1001, TrainRMSE: 0.1899, ValRMSE: 0.0000, ValLoss: 0.0000
Epoc

In [None]:
from sklearn.metrics import f1_score,recall_score,accuracy_score,roc_auc_score
val_mask = ~train_mask
preds=model(*model_inputs).argmax(dim=1)
test_pred=predictTestSet(user_indices_test,item_indices_test)

print('Total DataSet \n f1 score:{} , recall score: {},acc score:{}'.format(f1_score(labels,preds),recall_score(labels,preds),accuracy_score(labels,preds)))
print('Train DataSet \n f1 score:{} , recall score: {},acc score:{}'.format(f1_score(labels[train_mask],preds[train_mask]),recall_score(labels[train_mask],preds[train_mask]),accuracy_score(labels[train_mask],preds[train_mask])))
print('Val DataSet \n f1 score:{} , recall score: {},acc score:{}'.format(f1_score(labels[val_mask],preds[val_mask]),recall_score(labels[val_mask],preds[val_mask]),accuracy_score(labels[val_mask],preds[val_mask])))
# print('Test DataSet \n f1 score:{} , recall score: {},acc score:{}, auc score:{}'.format(f1_score(test_labels,test_pred),recall_score(test_labels,test_pred),accuracy_score(test_labels,test_pred),roc_auc_score(test_labels,test_pred)))
print('Test DataSet \n f1 score:{} , recall score: {},acc score:{}'.format(f1_score(test_labels,test_pred),recall_score(test_labels,test_pred),accuracy_score(test_labels,test_pred)))




Total DataSet 
 f1 score:0.9105164724500072 , recall score: 0.8357322065231745,acc score:0.8357322065231745
Train DataSet 
 f1 score:0.9105347410831519 , recall score: 0.8357629888890723,acc score:0.8357629888890723
Val DataSet 
 f1 score:0.8181818181818181 , recall score: 0.6923076923076923,acc score:0.6923076923076923
Test DataSet 
 f1 score:0.6538461538461539 , recall score: 0.6538461538461539,acc score:0.6538461538461539


In [None]:
b=pd.DataFrame({'name':item_indices_test})
b.loc[b['name'].apply(lambda x : x not in item_indices),:]

Unnamed: 0,name
11,6111
15,3114
25,8744


In [None]:
def get_adjacency1(edge_df,num_user,num_movie,sysmetric_normalization):
     ratings['usage']='train'
     user2item_adj=[]
     item2user_adj=[]
     train_edge_df=edge_df.loc[edge_df['usage']=='train']
     edge_index=train_edge_df.loc[:,['userId', 'movieId']].to_numpy()
     support = sp.csr_matrix((np.ones(len(edge_index)), (edge_index[:, 0], edge_index[:, 1])),
                                shape=(num_user, num_movie), dtype=np.float32)
     user2item_adj.append(support)
     item2user_adj.append(support.T)
     
    #  user2item_adj=globally_normalize_bipartite_adjacencies(user2item_adj,symmetric=sysmetric_normalization)
    #  item2user_adj=globally_normalize_bipartite_adjacencies(item2user_adj,symmetric=sysmetric_normalization)
     
     return user2item_adj,item2user_adj


In [None]:
     num_users=ratings.userId.max()+1
     num_items=ratings.movieId.max()+1

     user2item_adjs,item2user_adjs=get_adjacency1(ratings,num_users,num_items,sysmetric_normalization=False)
 

In [None]:
user2item_adjs[0]

<610x9724 sparse matrix of type '<class 'numpy.float32'>'
	with 0 stored elements in Compressed Sparse Row format>