In [1]:
import os
from os.path import join
import sys
import torch
from torch import nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from scipy.sparse import csr_matrix
import scipy.sparse as sp
import time
import dgl
import dgl.function as fn
import mxnet as mx
import torch.nn as nn

Using backend: pytorch


In [2]:
def dataload():
    #train_file = "./gowalla/example.txt"
    train_file = "./gowalla/train.txt"
    #test_file = "./gowalla/test.txt"
    m_item = 0
    n_user = 0
    trainUniqueUsers, trainItem, trainUser = [], [], []
    testUniqueUsers, testItem, testUser = [], [], []
    with open(train_file) as f:
        for l in f.readlines():
            if len(l) > 0:
                l = l.strip('\n').split(' ')
                items = [int(i) for i in l[1:]]
                uid = int(l[0])
                trainUniqueUsers.append(uid)
                trainUser.extend([uid] * len(items))
                trainItem.extend(items)
                m_item = max(m_item, max(items))
                n_user = max(n_user, uid)
    trainUser = np.array(trainUser)
    trainItem = np.array(trainItem)
    return trainUser,trainItem

In [3]:
def getgraph(trainUser,trainItem):
    g = dgl.heterograph({('user', 'buy' , 'item'): (trainUser, trainItem),
                         ('item', 'attr', 'user'): (trainItem,trainUser)})
    gre_user = g.out_degrees(etype=('user', 'buy' , 'item'))
    gre_item = g.out_degrees(etype=('item', 'attr' , 'user'))
    gre_user = gre_user ** (-0.5)
    gre_user = gre_user.unsqueeze(1).repeat(1,64)
    gre_item = gre_item  ** (-0.5)
    gre_item = gre_item.unsqueeze(1).repeat(1,64)
    return g, gre_user, gre_item

In [4]:
def UniformSample_original(trainUser,trainItem, g):
    n_user = g.num_nodes('user')
    m_item = g.num_nodes('item')
    UserItemNet = csr_matrix((np.ones(len(trainUser)), (trainUser, trainItem)),
                                      shape=(n_user, m_item))
    total_start = time.time()
    user_num = len(trainUser)
    users = np.random.randint(0, n_user, user_num)
    S = []
    sample_time1 = 0.
    sample_time2 = 0.
    allPos = []
    for user in range(n_user):
        allPos.append(UserItemNet[user].nonzero()[1])
    for i, user in enumerate(users):
        posForUser = allPos[user]
        if len(posForUser) == 0:
            continue
        posindex = np.random.randint(0, len(posForUser))
        positem = posForUser[posindex]
        while True:
            negitem = np.random.randint(0, m_item)
            if negitem in posForUser:
                continue
            else:
                break
        S.append([user, positem, negitem])

    total = time.time() - total_start
    print("time:{}".format(total))
    return np.array(S)

In [5]:
trainUser,trainItem= dataload()
g, gre_user, gre_item= getgraph(trainUser,trainItem)
sampling = UniformSample_original(trainUser,trainItem, g)

time:8.541903734207153


In [6]:
class Lightgcn(nn.Module):
    def __init__(self,
                g,
                gre_user,
                gre_item,
                n_layers):
        super(Lightgcn, self).__init__()
        self.g = g
        self.gre_user = gre_user
        self.gre_item = gre_item
        self.layers = n_layers
        self.num_users = g.num_nodes('user')
        self.num_items = g.num_nodes('item')
        self. __init_weight()
    def __init_weight(self):
        self.embedding_user = torch.nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=64)
        self.embedding_item = torch.nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=64)

    def computer(self):
        U= []
        I = []
        U.append(self.embedding_user.weight)
        I.append(self.embedding_item.weight)
        g.nodes['user'].data['emb'] = self.embedding_user.weight
        g.nodes['item'].data['emb'] = self.embedding_item.weight
        for i in range(self.layers):
            g.nodes['user'].data['emb'] = torch.mul(g.nodes['user'].data['emb'], self.gre_user)
            g.nodes['item'].data['emb'] = torch.mul(g.nodes['item'].data['emb'], self.gre_item)
            g.multi_update_all(
                {'buy':(fn.copy_u('emb', 'm'), fn.sum('m', 'neigh')),
                'attr':(fn.copy_u('emb', 'm'), fn.sum('m', 'neigh'))},
                "sum"
            )
            user_out = g.nodes['user'].data['neigh']
            item_out = g.nodes['item'].data['neigh']
            user_out = torch.mul(user_out, gre_user)
            item_out = torch.mul(item_out, gre_item)
            U.append(user_out)
            I.append(item_out)
        res_user = torch.stack(U,dim=1)
        res_user = torch.mean(res_user,dim=1)
        res_item = torch.stack(I,dim=1)
        res_item = torch.mean(res_item,dim=1)
        return res_user, res_item
    
    def bpr_loss(self, sampling, weight_decay):
        out_user, out_item = self.computer()
        score = torch.mm(out_user, out_item.t())
        #loss = torch.zeros(0)
        loss = 0
        for s in sampling:
            y_ui = score[s[0]][s[1]]
            y_uj = score[s[0]][s[2]]
            deta_y = nn.functional.softplus(y_ui - y_uj)
            loss -=  torch.log(deta_y)
        reg_loss = (out_user.norm(2).pow(2) + out_item.norm(2).pow(2)) * 0.5 / len(out_user)
        loss = loss + reg_loss * weight_decay
        loss = loss / len(sampling)
        return loss

In [7]:
def train(trainUser,trainItem,g,weight_decay,sampling):
    print(123)
    for epoch in range(n_epochs):
        model.train()
        t0 = time.time()
        loss = model.bpr_loss(sampling, weight_decay)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        t1 = time.time()
        print("Epoch {:05d} |Loss {:.4f} | Time(s) {:.4f} | ".format(epoch,loss.item(), t1-t0 ))

In [8]:
n_epochs = 100
weight_decay = 1e-4
n_layers = 3
model = Lightgcn(g,
                 gre_user,
                 gre_item,
                 n_layers)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



In [None]:
train(trainUser,trainItem,g,weight_decay,sampling)

123
