In [None]:
# !git clone https://github.com/DeepGraphLearning/RecommenderSystems.git

In [None]:
data_path = '/content/drive/My Drive/Recommendation/douban.gz'

In [None]:
# !tar -xzvf douban.gz

In [14]:
import pandas as pd
import numpy as np
import math
import argparse
import random
from collections import Counter
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [None]:
PATH_TO_DATA = './Douban/'

SOCIAL_NETWORK_FILE = PATH_TO_DATA + 'socialnet/socialnet.tsv'
RATING_FILE = PATH_TO_DATA + 'movie/douban_movie.tsv'

max_length = 30

Preprocessing

In [None]:
def process_rating(day=7): # segment session in every $day days.
    df = pd.read_csv(RATING_FILE, sep='\t', dtype={0:str, 1:str, 2:np.int32, 3: np.float32})
    df = df[df['Rating'].between(1,6,inclusive=True)]
    span_left = 1.2e9
    span_right = 1.485e9
    df = df[df['Timestamp'].between(span_left, span_right, inclusive=True)]
    min_timestamp = df['Timestamp'].min()
    time_id = [int(math.floor((t-min_timestamp) / (86400*day))) for t in df['Timestamp']]
    df['TimeId'] = time_id
    session_id = [str(uid)+'_'+str(tid) for uid, tid in zip(df['UserId'], df['TimeId'])]
    df['SessionId'] = session_id
    print('Statistics of user ratings:')
    print('\tNumber of total ratings: {}'.format(len(df)))
    print('\tNumber of users: {}'.format(df.UserId.nunique()))
    print('\tNumber of items: {}'.format(df.ItemId.nunique()))
    print('\tAverage ratings per user:{}'.format(df.groupby('UserId').size().mean()))
    return df

In [None]:
def process_social(): # read in social network.
    net = pd.read_csv(SOCIAL_NETWORK_FILE, sep='\t', dtype={0:str, 1: str})
    net.drop_duplicates(subset=['Follower', 'Followee'], inplace=True)
    friend_size = net.groupby('Follower').size()
    #net = net[np.in1d(net.Follower, friend_size[friend_size>=5].index)]
    print('Statistics of social network:')
    print('\tTotal user in social network:{}.\n\tTotal edges(links) in social network:{}.'.format(\
        net.Follower.nunique(), len(net)))
    print('\tAverage number of friends for users: {}'.format(net.groupby('Follower').size().mean()))
    return net

In [None]:
def reset_id(data, id_map, column_name='UserId'):
    mapped_id = data[column_name].map(id_map)
    data[column_name] = mapped_id
    if column_name == 'UserId':
        session_id = [str(uid)+'_'+str(tid) for uid, tid in zip(data['UserId'], data['TimeId'])]
        data['SessionId'] = session_id
    return data

In [None]:
def split_data(day): #split data for training/validation/testing.
    df_data = process_rating(day)
    df_net = process_social()
    df_net = df_net.loc[df_net['Follower'].isin(df_data['UserId'].unique())]
    df_net = df_net.loc[df_net['Followee'].isin(df_data['UserId'].unique())]
    df_data = df_data.loc[df_data['UserId'].isin(df_net.Follower.unique())]
    
    #restrict session length in [2, max_length]. We set a max_length because too long sequence may come from a fake user.
    df_data = df_data[df_data['SessionId'].groupby(df_data['SessionId']).transform('size')>1]
    df_data = df_data[df_data['SessionId'].groupby(df_data['SessionId']).transform('size')<=max_length]
    #length_supports = df_data.groupby('SessionId').size()
    #df_data = df_data[np.in1d(df_data.SessionId, length_supports[length_supports<=max_length].index)]
    
    # split train, test, valid.
    tmax = df_data.TimeId.max()
    session_max_times = df_data.groupby('SessionId').TimeId.max()
    session_train = session_max_times[session_max_times < tmax - 26].index
    session_holdout = session_max_times[session_max_times >= tmax - 26].index
    train_tr = df_data[df_data['SessionId'].isin(session_train)] 
    holdout_data = df_data[df_data['SessionId'].isin(session_holdout)] 
    
    print('Number of train/test: {}/{}'.format(len(train_tr), len(holdout_data)))
   
    train_tr = train_tr[train_tr['ItemId'].groupby(train_tr['ItemId']).transform('size')>=20]
    train_tr = train_tr[train_tr['SessionId'].groupby(train_tr['SessionId']).transform('size')>1]
    
    print('Item size in train data: {}'.format(train_tr['ItemId'].nunique()))
    train_item_counter = Counter(train_tr.ItemId)
    to_predict = Counter(el for el in train_item_counter.elements() if train_item_counter[el] >= 50).keys()
    print('Size of to predict: {}'.format(len(to_predict)))
    
    # split holdout to valid and test.
    holdout_cn = holdout_data.SessionId.nunique()
    holdout_ids = holdout_data.SessionId.unique()
    np.random.shuffle(holdout_ids)
    valid_cn = int(holdout_cn * 0.5)
    session_valid = holdout_ids[0: valid_cn]
    session_test = holdout_ids[valid_cn: ]
    valid = holdout_data[holdout_data['SessionId'].isin(session_valid)]
    test = holdout_data[holdout_data['SessionId'].isin(session_test)]

    valid = valid[valid['ItemId'].isin(to_predict)]
    valid = valid[valid['SessionId'].groupby(valid['SessionId']).transform('size')>1]
    
    test = test[test['ItemId'].isin(to_predict)]
    test = test[test['SessionId'].groupby(test['SessionId']).transform('size')>1]

    total_df = pd.concat([train_tr, valid, test])
    df_net = df_net.loc[df_net['Follower'].isin(total_df['UserId'].unique())]
    df_net = df_net.loc[df_net['Followee'].isin(total_df['UserId'].unique())]
    user_map = dict(zip(total_df.UserId.unique(), range(total_df.UserId.nunique()))) 
    item_map = dict(zip(total_df.ItemId.unique(), range(1, 1+total_df.ItemId.nunique()))) 
    with open('user_id_map.tsv', 'w') as fout:
        for k, v in user_map.items():
            fout.write(str(k) + '\t' + str(v) + '\n')
    with open('item_id_map.tsv', 'w') as fout:
        for k, v in item_map.items():
            fout.write(str(k) + '\t' + str(v) + '\n')
    num_users = len(user_map)
    num_items = len(item_map)
    reset_id(total_df, user_map)
    reset_id(train_tr, user_map)
    reset_id(valid, user_map)
    reset_id(test, user_map)
    reset_id(df_net, user_map, 'Follower')
    reset_id(df_net, user_map, 'Followee')
    reset_id(total_df, item_map, 'ItemId')
    reset_id(train_tr, item_map, 'ItemId')
    reset_id(valid, item_map, 'ItemId')
    reset_id(test, item_map, 'ItemId')
    
    print('Train set\n\tEvents: {}\n\tSessions: {}\n\tItems: {}\n\tAvg length: {}'.format(len(train_tr), train_tr.SessionId.nunique(), train_tr.ItemId.nunique(), train_tr.groupby('SessionId').size().mean()))
    print('Valid set\n\tEvents: {}\n\tSessions: {}\n\tItems: {}\n\tAvg length: {}'.format(len(valid), valid.SessionId.nunique(), valid.ItemId.nunique(), valid.groupby('SessionId').size().mean()))
    print('Test set\n\tEvents: {}\n\tSessions: {}\n\tItems: {}\n\tAvg length: {}'.format(len(test), test.SessionId.nunique(), test.ItemId.nunique(), test.groupby('SessionId').size().mean()))
    user2sessions = total_df.groupby('UserId')['SessionId'].apply(set).to_dict()
    user_latest_session = []
    for idx in range(num_users):
        sessions = user2sessions[idx]
        latest = []
        for t in range(tmax+1):
            if t == 0:
                latest.append('NULL')
            else:
                sess_id_tmp = str(idx) + '_' + str(t-1)
                if sess_id_tmp in sessions:
                    latest.append(sess_id_tmp)
                else:
                    latest.append(latest[t-1])
        user_latest_session.append(latest)
    
    train_tr.to_csv('train.tsv', sep='\t', index=False)
    valid.to_csv('valid.tsv', sep='\t', index=False)
    test.to_csv('test.tsv', sep='\t', index=False)
    df_net.to_csv('adj.tsv', sep='\t', index=False)
    with open('latest_sessions.txt', 'w') as fout:
        for idx in range(num_users):
            fout.write(','.join(user_latest_session[idx]) + '\n')

In [23]:
day = 7
split_data(day)

Statistics of user ratings:
	Number of total ratings: 8483267
	Number of users: 82901
	Number of items: 73677
	Average ratings per user:102.33009251999373
Statistics of social network:
	Total user in social network:112679.
	Total edges(links) in social network:1758302.
	Average number of friends for users: 15.604522581847549
Number of train/test: 2916327/134476
Item size in train data: 12591
Size of to predict: 7531
Train set
	Events: 2717619
	Sessions: 650675
	Items: 12591
	Avg length: 4.176615053598186
Valid set
	Events: 29932
	Sessions: 8033
	Items: 4901
	Avg length: 3.7261297149259307
Test set
	Events: 29519
	Sessions: 8009
	Items: 4822
	Avg length: 3.685728555375203


UTILS

In [None]:
#coding=utf-8
from __future__ import print_function

import numpy as np
import pandas as pd
import random

def load_adj(data_path):
    df_adj = pd.read_csv(data_path + '/adj.tsv', sep='\t', dtype={0:np.int32, 1:np.int32})
    return df_adj    

def load_latest_session(data_path):
    ret = []
    for line in open(data_path + '/latest_sessions.txt'):
        chunks = line.strip().split(',')
        ret.append(chunks)
    return ret

def load_map(data_path, name='user'):
    if name == 'user':
        file_path = data_path + '/user_id_map.tsv'
    elif name == 'item':
        file_path = data_path + '/item_id_map.tsv'
    else:
        raise NotImplementedError
    id_map = {}
    for line in open(file_path):
        k, v = line.strip().split('\t')
        id_map[k] = str(v)
    return id_map

def load_data(data_path):
    adj = load_adj(data_path)
    latest_sessions = load_latest_session(data_path)
    user_id_map = load_map(data_path, 'user')
    item_id_map = load_map(data_path, 'item')
    train = pd.read_csv(data_path + '/train.tsv', sep='\t', dtype={0:np.int32, 1:np.int32, 3:np.float32})
    valid = pd.read_csv(data_path + '/valid.tsv', sep='\t', dtype={0:np.int32, 1:np.int32, 3:np.float32})
    test = pd.read_csv(data_path + '/test.tsv', sep='\t', dtype={0:np.int32, 1:np.int32, 3:np.float32})
    return [adj, latest_sessions, user_id_map, item_id_map, train, valid, test]

data_path = '/content'
data = load_data(data_path)

INITS

In [None]:
import tensorflow as tf
import numpy as np

# DISCLAIMER:
# This file is derived from 
# https://github.com/tkipf/gcn
# which is also under the MIT license

def uniform(shape, scale=0.05, name=None):
    """Uniform init."""
    initial = tf.random_uniform(shape, minval=-scale, maxval=scale, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def glorot(shape, name=None):
    """Glorot & Bengio (AISTATS 2010) init."""
    init_range = np.sqrt(6.0/(shape[0]+shape[1]))
    initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)


def zeros(shape, name=None):
    """All zeros."""
    initial = tf.zeros(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)

def ones(shape, name=None):
    """All ones."""
    initial = tf.ones(shape, dtype=tf.float32)
    return tf.Variable(initial, name=name)

Neigh Samplers

In [None]:
"""
Classes that are used to sample node neighborhoods
"""

class UniformNeighborSampler(object):
    """
    Uniformly samples neighbors.
    Assumes that adj lists are padded with random re-sampling
    """
    def __init__(self, adj_info, visible_time, deg):
        self.adj_info = adj_info
        self.visible_time = visible_time
        self.deg = deg

    def __call__(self, inputs):
        nodeids, num_samples, timeids, first_or_second, support_size = inputs
        adj_lists = []
        for idx in range(len(nodeids)):
            node = nodeids[idx]
            timeid = timeids[idx // support_size]
            adj = self.adj_info[node, :]
            neighbors = []
            for neighbor in adj:
                if first_or_second == 'second':
                    if self.visible_time[neighbor] <= timeid:
                        neighbors.append(neighbor)
                elif first_or_second == 'first':
                    if self.visible_time[neighbor] <= timeid and self.deg[neighbor] > 0:
                        for second_neighbor in self.adj_info[neighbor]:
                            if self.visible_time[second_neighbor] <= timeid:
                                neighbors.append(neighbor)
                                break
            assert len(neighbors) > 0
            if len(neighbors) < num_samples:
                neighbors = np.random.choice(neighbors, num_samples, replace=True)
            elif len(neighbors) > num_samples:
                neighbors = np.random.choice(neighbors, num_samples, replace=False)
            adj_lists.append(neighbors)
        return np.array(adj_lists, dtype=np.int32)

LAYERS

In [None]:
from __future__ import division
from __future__ import print_function

# DISCLAIMER:
# This file is forked from 
# https://github.com/tkipf/gcn
# which is also under the MIT license

# global unique layer ID dictionary for layer name assignment
_LAYER_UIDS = {}

def get_layer_uid(layer_name=''):
    """Helper function, assigns unique layer IDs."""
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]

class Layer(object):
    """Base layer class. Defines basic API for all layer objects.
    Implementation inspired by keras (http://keras.io).
    # Properties
        name: String, defines the variable scope of the layer.
        logging: Boolean, switches Tensorflow histogram logging on/off
    # Methods
        _call(inputs): Defines computation graph of layer
            (i.e. takes input, returns output)
        __call__(inputs): Wrapper for _call()
        _log_vars(): Log all variables
    """

    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging', 'model_size'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.sparse_inputs = False

    def _call(self, inputs):
        return inputs

    def __call__(self, inputs):
        with tf.name_scope(self.name):
            if self.logging and not self.sparse_inputs:
                tf.summary.histogram(self.name + '/inputs', inputs)
            outputs = self._call(inputs)
            if self.logging:
                tf.summary.histogram(self.name + '/outputs', outputs)
            return outputs

    def _log_vars(self):
        for var in self.vars:
            tf.summary.histogram(self.name + '/vars/' + var, self.vars[var])


class Dense(Layer):
    """Dense layer."""
    def __init__(self, input_dim, output_dim, dropout=0., weight_decay=0.,
                 act=tf.nn.relu, placeholders=None, bias=True, featureless=False, 
                 sparse_inputs=False, **kwargs):
        super(Dense, self).__init__(**kwargs)

        self.dropout = dropout
        self.weight_decay = weight_decay

        self.act = act
        self.featureless = featureless
        self.bias = bias
        self.input_dim = input_dim
        self.output_dim = output_dim

        # helper variable for sparse dropout
        self.sparse_inputs = sparse_inputs
        if sparse_inputs:
            self.num_features_nonzero = placeholders['num_features_nonzero']

        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = tf.get_variable('weights', shape=(input_dim, output_dim),
                                         dtype=tf.float32, 
                                         initializer=tf.contrib.layers.xavier_initializer(),
                                         regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay))
            if self.bias:
                self.vars['bias'] = zeros([output_dim], name='bias')

        if self.logging:
            self._log_vars()

    def _call(self, inputs):
        x = inputs

        x = tf.nn.dropout(x, 1-self.dropout)

        # transform
        output = tf.matmul(x, self.vars['weights'])

        # bias
        if self.bias:
            output += self.vars['bias']

        return self.act(output)

MINI BATCH

In [28]:
import numpy as np
import pandas as pd
import sys

np.random.seed(123)

class MinibatchIterator(object):
    
    def __init__(self, 
                adj_info, # in pandas dataframe
                latest_sessions,
                data, # data list, either [train, valid] or [train, valid, test].
                placeholders,
                batch_size,
                max_degree,
                num_nodes,
                max_length=20,
                samples_1_2=[10,5],
                training=True):
        self.num_layers = 2 # Currently, only 2 layer is supported.
        self.adj_info = adj_info
        self.latest_sessions = latest_sessions
        self.training = training
        self.train_df, self.valid_df, self.test_df = data
        self.all_data = pd.concat(data)
        self.placeholders = placeholders
        self.batch_size = batch_size
        self.max_degree = max_degree
        self.num_nodes = num_nodes
        self.max_length = max_length
        self.samples_1_2 = samples_1_2
        self.sizes = [1, samples_1_2[1], samples_1_2[1]*samples_1_2[0]]
        self.visible_time = self.user_visible_time()
        self.test_adj, self.test_deg = self.construct_test_adj()
        if self.training:
            self.adj, self.deg = self.construct_adj()
            self.train_session_ids = self._remove_infoless(self.train_df, self.adj, self.deg)
            self.valid_session_ids = self._remove_infoless(self.valid_df, self.test_adj, self.test_deg)
            self.sampler = UniformNeighborSampler(self.adj, self.visible_time, self.deg)
        
        self.test_session_ids = self._remove_infoless(self.test_df, self.test_adj, self.test_deg)
       
        self.padded_data, self.mask = self._padding_sessions(self.all_data)
        self.test_sampler = UniformNeighborSampler(self.test_adj, self.visible_time, self.test_deg)
        
        self.batch_num = 0
        self.batch_num_val = 0
        self.batch_num_test = 0

    def user_visible_time(self):
        '''
            Find out when each user is 'visible' to her friends, i.e., every user's first click/watching time.
        '''
        visible_time = []
        for l in self.latest_sessions:
            timeid = max(loc for loc, val in enumerate(l) if val == 'NULL') + 1
            visible_time.append(timeid)
            assert timeid > 0 and timeid < len(l), 'Wrong when create visible time {}'.format(timeid)
        return visible_time

    def _remove_infoless(self, data, adj, deg):
        '''
        Remove users who have no sufficient friends.
        '''
        data = data.loc[deg[data['UserId']] != 0]
        reserved_session_ids = []
        print('sessions: {}\tratings: {}'.format(data.SessionId.nunique(), len(data)))
        for sessid in data.SessionId.unique():
            userid, timeid = sessid.split('_')
            userid, timeid = int(userid), int(timeid)
            cn_1 = 0
            for neighbor in adj[userid, : ]:
                if self.visible_time[neighbor] <= timeid and deg[neighbor] > 0:
                    cn_2 = 0
                    for second_neighbor in adj[neighbor, : ]:
                        if self.visible_time[second_neighbor] <= timeid:
                            break
                        cn_2 += 1
                    if cn_2 < self.max_degree:
                        break
                cn_1 += 1
            if cn_1 < self.max_degree:
                reserved_session_ids.append(sessid)
        return reserved_session_ids

    def _padding_sessions(self, data):
        '''
        Pad zeros at the end of each session to length self.max_length for batch training.
        '''
        data = data.sort_values(by=['TimeId']).groupby('SessionId')['ItemId'].apply(list).to_dict()
        new_data = {}
        data_mask = {}
        for k, v in data.items():
            mask = np.ones(self.max_length, dtype=np.float32)
            x = v[:-1]
            y = v[1: ]
            assert len(x) > 0
            padded_len = self.max_length - len(x)
            if padded_len > 0:
                x.extend([0] * padded_len)
                y.extend([0] * padded_len)
                mask[-padded_len: ] = 0.
            v.extend([0] * (self.max_length - len(v)))
            x = x[:self.max_length]
            y = y[:self.max_length]
            v = v[:self.max_length]
            new_data[k] = [np.array(x, dtype=np.int32), np.array(y, dtype=np.int32), np.array(v, dtype=np.int32)]
            data_mask[k] = np.array(mask, dtype=bool)
        return new_data, data_mask

    def _batch_feed_dict(self, current_batch):
        '''
        Construct batch inputs.
        '''
        current_batch_sess_ids, samples, support_sizes = current_batch
        feed_dict = {}
        input_x = []
        input_y = []
        mask_y = []
        timeids = []
        for sessid in current_batch_sess_ids:
            nodeid, timeid = sessid.split('_')
            timeids.append(int(timeid))
            x, y, _ = self.padded_data[sessid]
            mask = self.mask[sessid]
            input_x.append(x)
            input_y.append(y)
            mask_y.append(mask)
        feed_dict.update({self.placeholders['input_x']: input_x})
        feed_dict.update({self.placeholders['input_y']: input_y})
        feed_dict.update({self.placeholders['mask_y']: mask_y})

        feed_dict.update({self.placeholders['support_nodes_layer1']: samples[2]})
        feed_dict.update({self.placeholders['support_nodes_layer2']: samples[1]})
        #prepare sopportive user's recent sessions.
        support_layers_session = []
        support_layers_length = []
        for layer in range(self.num_layers):
            start = 0
            t = self.num_layers - layer
            support_sessions = []
            support_lengths = []
            for batch in range(self.batch_size):
                timeid = timeids[batch]
                support_nodes = samples[t][start: start + support_sizes[t]]
                for support_node in support_nodes:
                    support_session_id = str(self.latest_sessions[support_node][timeid])
                    support_session = self.padded_data[support_session_id][2]
                    #print(support_session)
                    length = np.count_nonzero(support_session)
                    support_sessions.append(support_session)
                    support_lengths.append(length)
                start += support_sizes[t]
            support_layers_session.append(support_sessions)
            support_layers_length.append(support_lengths)
        feed_dict.update({self.placeholders['support_sessions_layer1']:support_layers_session[0]})
        feed_dict.update({self.placeholders['support_sessions_layer2']:support_layers_session[1]})
        feed_dict.update({self.placeholders['support_lengths_layer1']:support_layers_length[0]})
        feed_dict.update({self.placeholders['support_lengths_layer2']:support_layers_length[1]})
        return feed_dict 

    def sample(self, nodeids, timeids, sampler):
        '''
        Sample neighbors recursively. First-order, then second-order, ...
        '''
        samples = [nodeids]
        support_size = 1
        support_sizes = [support_size]
        first_or_second = ['second', 'first']
        for k in range(self.num_layers):
            t = self.num_layers - k - 1
            node = sampler([samples[k], self.samples_1_2[t], timeids, first_or_second[t], support_size])
            support_size *= self.samples_1_2[t]
            samples.append(np.reshape(node, [support_size * self.batch_size,]))
            support_sizes.append(support_size)
        return samples, support_sizes

    def next_val_minibatch_feed_dict(self, val_or_test='val'):
        '''
        Construct evaluation or test inputs.
        '''
        if val_or_test == 'val':
            start = self.batch_num_val * self.batch_size
            self.batch_num_val += 1
            data = self.valid_session_ids
        elif val_or_test == 'test':
            start = self.batch_num_test * self.batch_size
            self.batch_num_test += 1
            data = self.test_session_ids
        else:
            raise NotImplementedError
        
        current_batch_sessions = data[start: start + self.batch_size]
        nodes = [int(sessionid.split('_')[0]) for sessionid in current_batch_sessions]
        timeids = [int(sessionid.split('_')[1]) for sessionid in current_batch_sessions]
        samples, support_sizes = self.sample(nodes, timeids, self.test_sampler)
        return self._batch_feed_dict([current_batch_sessions, samples, support_sizes])

    def next_train_minibatch_feed_dict(self):
        '''
        Generate next training batch data.
        '''
        start = self.batch_num * self.batch_size
        self.batch_num += 1
        current_batch_sessions = self.train_session_ids[start: start + self.batch_size]
        nodes = [int(sessionid.split('_')[0]) for sessionid in current_batch_sessions]
        timeids = [int(sessionid.split('_')[1]) for sessionid in current_batch_sessions]
        samples, support_sizes = self.sample(nodes, timeids, self.sampler)
        return self._batch_feed_dict([current_batch_sessions, samples, support_sizes])

    def construct_adj(self):
        '''
        Construct adj table used during training.
        '''
        adj = self.num_nodes*np.ones((self.num_nodes+1, self.max_degree), dtype=np.int32)
        deg = np.zeros((self.num_nodes,))
        missed = 0
        for nodeid in self.train_df.UserId.unique():
            neighbors = np.array([neighbor for neighbor in 
                                self.adj_info.loc[self.adj_info['Follower']==nodeid].Followee.unique()], dtype=np.int32)
            deg[nodeid] = len(neighbors)
            if len(neighbors) == 0:
                missed += 1
                continue
            if len(neighbors) > self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=False)
            elif len(neighbors) < self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=True)
            adj[nodeid, :] = neighbors
        #print('Unexpected missing during constructing adj list: {}'.format(missed))
        return adj, deg

    def construct_test_adj(self):
        '''
        Construct adj table used during evaluation or testing.
        '''
        adj = self.num_nodes*np.ones((self.num_nodes+1, self.max_degree), dtype=np.int32)
        deg = np.zeros((self.num_nodes,))
        missed = 0
        data = self.all_data
        for nodeid in data.UserId.unique():
            neighbors = np.array([neighbor for neighbor in 
                                self.adj_info.loc[self.adj_info['Follower']==nodeid].Followee.unique()], dtype=np.int32)
            deg[nodeid] = len(neighbors)
            if len(neighbors) == 0:
                missed += 1
                continue
            if len(neighbors) > self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=False)
            elif len(neighbors) < self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=True)
            adj[nodeid, :] = neighbors
        #print('Unexpected missing during constructing adj list: {}'.format(missed))
        return adj, deg

    def end(self):
        '''
        Indicate whether we finish a pass over all training samples.
        '''
        return self.batch_num * self.batch_size > len(self.train_session_ids) - self.batch_size
    
    def end_val(self, val_or_test='val'):
        '''
        Indicate whether we finish a pass over all testing or evaluation samples.
        '''
        batch_num = self.batch_num_val if val_or_test == 'val' else self.batch_num_test
        data = self.valid_session_ids if val_or_test == 'val' else self.test_session_ids
        end = batch_num * self.batch_size > len(data) - self.batch_size
        if end:
            if val_or_test == 'val':
                self.batch_num_val = 0
            elif val_or_test == 'test':
                self.batch_num_test = 0
            else:
                raise NotImplementedError
        if end:
            self.batch_num_val = 0
        return end

    def shuffle(self):
        '''
        Shuffle training data.
        '''
        self.train_session_ids = np.random.permutation(self.train_session_ids)
        self.batch_num = 0

data = load_data('/content')
adj_info = data[0]
latest_per_user_by_time = data[1]
user_id_map = data[2]
item_id_map = data[3]
train_df = data[4]
valid_df = data[5]
test_df = data[6]

minibatch = MinibatchIterator(adj_info,
            latest_per_user_by_time,
            [train_df, valid_df, test_df],
            None, #placeholders,
            batch_size=1,
            max_degree=50,
            num_nodes=len(user_id_map),
            max_length=20,
            samples_1_2=[10, 5])

sessions: 613221	ratings: 2554567
sessions: 7354	ratings: 27185
sessions: 7304	ratings: 26705


AGGREGATORS

In [None]:
# Mean, MaxPool, GCN aggregators are collected from 
# https://github.com/williamleif/GraphSAGE
# which is also under the MIT license

class MeanAggregator(Layer):
    """
    Aggregates via mean followed by matmul and non-linearity.
    """

    def __init__(self, input_dim, output_dim, neigh_input_dim=None,
            dropout=0., bias=False, act=tf.nn.relu, 
            name=None, concat=False, **kwargs):
        super(MeanAggregator, self).__init__(**kwargs)

        self.dropout = dropout
        self.bias = bias
        self.act = act
        self.concat = concat

        if neigh_input_dim is None:
            neigh_input_dim = input_dim

        if name is not None:
            name = '/' + name
        else:
            name = ''

        with tf.variable_scope(self.name + name + '_vars'):
            self.vars['neigh_weights'] = glorot([neigh_input_dim, output_dim],
                                                        name='neigh_weights')
            self.vars['self_weights'] = glorot([input_dim, output_dim],
                                                        name='self_weights')
            if self.bias:
                self.vars['bias'] = zeros([self.output_dim], name='bias')

        if self.logging:
            self._log_vars()

        self.input_dim = input_dim
        self.output_dim = output_dim

    def _call(self, inputs):
        self_vecs, neigh_vecs = inputs

        neigh_vecs = tf.nn.dropout(neigh_vecs, 1-self.dropout)
        self_vecs = tf.nn.dropout(self_vecs, 1-self.dropout)
        neigh_means = tf.reduce_mean(neigh_vecs, axis=1)
       
        # [nodes] x [out_dim]
        from_neighs = tf.matmul(neigh_means, self.vars['neigh_weights'])

        from_self = tf.matmul(self_vecs, self.vars["self_weights"])
         
        if not self.concat:
            output = tf.add_n([from_self, from_neighs])
        else:
            output = tf.concat([from_self, from_neighs], axis=1)

        # bias
        if self.bias:
            output += self.vars['bias']
       
        return self.act(output)

class GCNAggregator(Layer):
    """
    Aggregates via mean followed by matmul and non-linearity.
    Same matmul parameters are used self vector and neighbor vectors.
    """

    def __init__(self, input_dim, output_dim, neigh_input_dim=None,
            dropout=0., bias=False, act=tf.nn.relu, name=None, concat=False, **kwargs):
        super(GCNAggregator, self).__init__(**kwargs)

        self.dropout = dropout
        self.bias = bias
        self.act = act
        self.concat = concat

        if neigh_input_dim is None:
            neigh_input_dim = input_dim

        if name is not None:
            name = '/' + name
        else:
            name = ''

        with tf.variable_scope(self.name + name + '_vars'):
            self.vars['weights'] = glorot([neigh_input_dim, output_dim],
                                                        name='neigh_weights')
            if self.bias:
                self.vars['bias'] = zeros([self.output_dim], name='bias')

        if self.logging:
            self._log_vars()

        self.input_dim = input_dim
        self.output_dim = output_dim

    def _call(self, inputs):
        self_vecs, neigh_vecs = inputs

        neigh_vecs = tf.nn.dropout(neigh_vecs, 1-self.dropout)
        self_vecs = tf.nn.dropout(self_vecs, 1-self.dropout)
        means = tf.reduce_mean(tf.concat([neigh_vecs, 
            tf.expand_dims(self_vecs, axis=1)], axis=1), axis=1)
       
        # [nodes] x [out_dim]
        output = tf.matmul(means, self.vars['weights'])

        # bias
        if self.bias:
            output += self.vars['bias']
       
        return self.act(output)


class AttentionAggregator(Layer):
    
    def __init__(self, input_dim, output_dim, neigh_input_dim=None,
            dropout=0., bias=False, act=tf.nn.relu, name=None, concat=False, **kwargs):
        super(AttentionAggregator, self).__init__(**kwargs)

        self.dropout = dropout
        self.bias = bias
        self.act = act
        self.concat = concat

        if neigh_input_dim is None:
            neigh_input_dim = input_dim

        if name is not None:
            name = '/' + name
        else:
            name = ''

        with tf.variable_scope(self.name + name + '_vars'):
            self.vars['weights'] = glorot([neigh_input_dim, output_dim],
                                                        name='neigh_weights')
            if self.bias:
                self.vars['bias'] = zeros([self.output_dim], name='neigh_bias')


        if self.logging:
            self._log_vars()

        self.input_dim = input_dim
        self.output_dim = output_dim
    
    def _call(self, inputs):
        self_vecs, neigh_vecs = inputs

        neigh_vecs = tf.nn.dropout(neigh_vecs, 1-self.dropout)
        self_vecs = tf.nn.dropout(self_vecs, 1-self.dropout)
        
        # Reshape from [batch_size, depth] to [batch_size, 1, depth] for matmul.
        query = tf.expand_dims(self_vecs, 1)
        neigh_self_vecs = tf.concat([neigh_vecs, query], axis=1)
        score = tf.matmul(query, neigh_self_vecs, transpose_b=True)
        score = tf.nn.softmax(score, dim=-1)

        # alignment(score) shape is [batch_size, 1, depth]
        context = tf.matmul(score, neigh_self_vecs)
        context = tf.squeeze(context, [1])

        # [nodes] x [out_dim]
        output = tf.matmul(context, self.vars['weights'])

        # bias
        if self.bias:
            output += self.vars['bias']
       
        return self.act(output)


class MaxPoolingAggregator(Layer):
    """ Aggregates via max-pooling over MLP functions.
    """
    def __init__(self, input_dim, output_dim, model_size="small", neigh_input_dim=None,
            dropout=0., bias=False, act=tf.nn.relu, name=None, concat=False, **kwargs):
        super(MaxPoolingAggregator, self).__init__(**kwargs)

        self.dropout = dropout
        self.bias = bias
        self.act = act
        self.concat = concat

        if neigh_input_dim is None:
            neigh_input_dim = input_dim

        if name is not None:
            name = '/' + name
        else:
            name = ''

        if model_size == "small":
            hidden_dim = self.hidden_dim = 512
        elif model_size == "big":
            hidden_dim = self.hidden_dim = 1024

        self.mlp_layers = []
        self.mlp_layers.append(Dense(input_dim=neigh_input_dim,
                                 output_dim=hidden_dim,
                                 act=tf.nn.relu,
                                 dropout=dropout,
                                 sparse_inputs=False,
                                 logging=self.logging))

        with tf.variable_scope(self.name + name + '_vars'):
            self.vars['neigh_weights'] = glorot([hidden_dim, output_dim],
                                                        name='neigh_weights')
           
            self.vars['self_weights'] = glorot([input_dim, output_dim],
                                                        name='self_weights')
            if self.bias:
                self.vars['bias'] = zeros([self.output_dim], name='bias')

        if self.logging:
            self._log_vars()

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.neigh_input_dim = neigh_input_dim

    def _call(self, inputs):
        self_vecs, neigh_vecs = inputs
        neigh_h = neigh_vecs

        dims = tf.shape(neigh_h)
        batch_size = dims[0]
        num_neighbors = dims[1]
        # [nodes * sampled neighbors] x [hidden_dim]
        h_reshaped = tf.reshape(neigh_h, (batch_size * num_neighbors, self.neigh_input_dim))

        for l in self.mlp_layers:
            h_reshaped = l(h_reshaped)
        neigh_h = tf.reshape(h_reshaped, (batch_size, num_neighbors, self.hidden_dim))
        neigh_h = tf.reduce_max(neigh_h, axis=1)
        
        from_neighs = tf.matmul(neigh_h, self.vars['neigh_weights'])
        from_self = tf.matmul(self_vecs, self.vars["self_weights"])
        
        if not self.concat:
            output = tf.add_n([from_self, from_neighs])
        else:
            output = tf.concat([from_self, from_neighs], axis=1)

        # bias
        if self.bias:
            output += self.vars['bias']
       
        return self.act(output)


class MeanPoolingAggregator(Layer):
    """ Aggregates via mean-pooling over MLP functions.
    """
    def __init__(self, input_dim, output_dim, model_size="small", neigh_input_dim=None,
            dropout=0., bias=False, act=tf.nn.relu, name=None, concat=False, **kwargs):
        super(MeanPoolingAggregator, self).__init__(**kwargs)

        self.dropout = dropout
        self.bias = bias
        self.act = act
        self.concat = concat

        if neigh_input_dim is None:
            neigh_input_dim = input_dim

        if name is not None:
            name = '/' + name
        else:
            name = ''

        if model_size == "small":
            hidden_dim = self.hidden_dim = 512
        elif model_size == "big":
            hidden_dim = self.hidden_dim = 1024

        self.mlp_layers = []
        self.mlp_layers.append(Dense(input_dim=neigh_input_dim,
                                 output_dim=hidden_dim,
                                 act=tf.nn.relu,
                                 dropout=dropout,
                                 sparse_inputs=False,
                                 logging=self.logging))

        with tf.variable_scope(self.name + name + '_vars'):
            self.vars['neigh_weights'] = glorot([hidden_dim, output_dim],
                                                        name='neigh_weights')
           
            self.vars['self_weights'] = glorot([input_dim, output_dim],
                                                        name='self_weights')
            if self.bias:
                self.vars['bias'] = zeros([self.output_dim], name='bias')

        if self.logging:
            self._log_vars()

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.neigh_input_dim = neigh_input_dim

    def _call(self, inputs):
        self_vecs, neigh_vecs = inputs
        neigh_h = neigh_vecs

        dims = tf.shape(neigh_h)
        batch_size = dims[0]
        num_neighbors = dims[1]
        # [nodes * sampled neighbors] x [hidden_dim]
        h_reshaped = tf.reshape(neigh_h, (batch_size * num_neighbors, self.neigh_input_dim))

        for l in self.mlp_layers:
            h_reshaped = l(h_reshaped)
        neigh_h = tf.reshape(h_reshaped, (batch_size, num_neighbors, self.hidden_dim))
        neigh_h = tf.reduce_mean(neigh_h, axis=1)
        
        from_neighs = tf.matmul(neigh_h, self.vars['neigh_weights'])
        from_self = tf.matmul(self_vecs, self.vars["self_weights"])
        
        if not self.concat:
            output = tf.add_n([from_self, from_neighs])
        else:
            output = tf.concat([from_self, from_neighs], axis=1)

        # bias
        if self.bias:
            output += self.vars['bias']
       
        return self.act(output)


MODEL

In [None]:
class DGRec(object):

    def __init__(self, args, support_sizes, placeholders):
        self.support_sizes = support_sizes
        if args.aggregator_type == "mean":
            self.aggregator_cls = MeanAggregator
        elif args.aggregator_type == "seq":
            self.aggregator_cls = SeqAggregator
        elif args.aggregator_type == "maxpool":
            self.aggregator_cls = MaxPoolingAggregator
        elif args.aggregator_type == "meanpool":
            self.aggregator_cls = MeanPoolingAggregator
        elif args.aggregator_type == "gcn":
            self.aggregator_cls = GCNAggregator
        elif args.aggregator_type == "attn":
            self.aggregator_cls = AttentionAggregator
        else:
            raise Exception("Unknown aggregator: ", self.aggregator_cls)
        self.input_x = placeholders['input_x']
        self.input_y = placeholders['input_y']
        self.mask_y = placeholders['mask_y']
        self.mask = tf.cast(self.mask_y, dtype=tf.float32)
        self.point_count = tf.reduce_sum(self.mask)
        self.support_nodes_layer1 = placeholders['support_nodes_layer1']
        self.support_nodes_layer2 = placeholders['support_nodes_layer2']
        self.support_sessions_layer1 = placeholders['support_sessions_layer1']
        self.support_sessions_layer2 = placeholders['support_sessions_layer2']
        self.support_lengths_layer1 = placeholders['support_lengths_layer1']
        self.support_lengths_layer2 = placeholders['support_lengths_layer2']

        self.training = args.training
        self.concat = args.concat
        if args.act == 'linear':
            self.act = lambda x:x
        elif args.act == 'relu':
            self.act = tf.nn.relu
        elif args.act == 'elu':
            self.act = tf.nn.elu
        else:
            raise NotImplementedError
        self.batch_size = args.batch_size
        self.hidden_size = args.hidden_size
        self.samples_1 = args.samples_1
        self.samples_2 = args.samples_2
        self.num_samples = [self.samples_1, self.samples_2]
        self.n_items = args.num_items
        self.n_users = args.num_users
        self.emb_item = args.embedding_size
        self.emb_user = args.emb_user
        self.max_length = args.max_length
        self.model_size = args.model_size
        self.dropout = args.dropout
        self.dim1 = args.dim1
        self.dim2 = args.dim2
        self.weight_decay = args.weight_decay
        self.global_only = args.global_only
        self.local_only = args.local_only

        self.dims = [self.hidden_size, args.dim1, args.dim2]
        self.dense_layers = []
        self.loss = 0
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.lr = tf.maximum(1e-5, tf.train.exponential_decay(args.learning_rate,
                                                            self.global_step,
                                                            args.decay_steps,
                                                            args.decay_rate,
                                                            staircase=True))
        self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.build()

    def global_features(self):
        self.user_embedding = tf.get_variable('user_embedding', [self.n_users, self.emb_user],\
                                        initializer=tf.glorot_uniform_initializer())
        feature_layer1 = tf.nn.embedding_lookup(self.user_embedding, self.support_nodes_layer1)
        feature_layer2 = tf.nn.embedding_lookup(self.user_embedding, self.support_nodes_layer2)
        dense_layer = Dense(self.emb_user, 
                            self.hidden_size if self.global_only else self.hidden_size // 2,
                            act=tf.nn.relu,
                            dropout=self.dropout if self.training else 0.)
        self.dense_layers.append(dense_layer)
        feature_layer1 = dense_layer(feature_layer1)
        feature_layer2 = dense_layer(feature_layer2)
        return [feature_layer2, feature_layer1]
    
    def local_features(self):
        '''
        Use the same rnn in decode function
        '''
        initial_state_layer1 = self.lstm_cell.zero_state(self.batch_size*self.samples_1*self.samples_2, dtype=tf.float32)
        initial_state_layer2 = self.lstm_cell.zero_state(self.batch_size*self.samples_2, dtype=tf.float32)
        inputs_1 = tf.nn.embedding_lookup(self.embedding, self.support_sessions_layer1)
        inputs_2 = tf.nn.embedding_lookup(self.embedding, self.support_sessions_layer2)
        outputs1, states1 = tf.nn.dynamic_rnn(cell=self.lstm_cell,
                                            inputs=inputs_1, 
                                            sequence_length=self.support_lengths_layer1,
                                            initial_state=initial_state_layer1,
                                            dtype=tf.float32)
        outputs2, states2 = tf.nn.dynamic_rnn(cell=self.lstm_cell,
                                            inputs=inputs_2, 
                                            sequence_length=self.support_lengths_layer2,
                                            initial_state=initial_state_layer2,
                                            dtype=tf.float32)
        # outputs: shape[batch_size, max_time, depth]
        local_layer1 = states1.h
        local_layer2 = states2.h
        dense_layer = Dense(self.hidden_size, 
                            self.hidden_size if self.local_only else self.hidden_size // 2,
                            act=tf.nn.relu,
                            dropout=self.dropout if self.training else 0.)
        self.dense_layers.append(dense_layer)
        local_layer1 = dense_layer(local_layer1)
        local_layer2 = dense_layer(local_layer2)
        return [local_layer2, local_layer1]

    def global_and_local_features(self):
        #global features
        global_feature_layer2, global_feature_layer1 = self.global_features()
        local_feature_layer2, local_feature_layer1 = self.local_features()
        global_local_layer2 = tf.concat([global_feature_layer2, local_feature_layer2], -1)
        global_local_layer1 = tf.concat([global_feature_layer1, local_feature_layer1], -1)
        return [global_local_layer2, global_local_layer1]

    def aggregate(self, hidden, dims, num_samples, support_sizes, 
            aggregators=None, name=None, concat=False, model_size="small"):
        """ At each layer, aggregate hidden representations of neighbors to compute the hidden representations 
            at next layer.
        Args:
            samples: a list of samples of variable hops away for convolving at each layer of the
                network. Length is the number of layers + 1. Each is a vector of node indices.
            input_features: the input features for each sample of various hops away.
            dims: a list of dimensions of the hidden representations from the input layer to the
                final layer. Length is the number of layers + 1.
            num_samples: list of number of samples for each layer.
            support_sizes: the number of nodes to gather information from for each layer.
            batch_size: the number of inputs (different for batch inputs and negative samples).
        Returns:
            The hidden representation at the final layer for all nodes in batch
        """


        # length: number of layers + 1
        hidden = hidden
        new_agg = aggregators is None
        if new_agg:
            aggregators = []
        for layer in range(len(num_samples)):
            if new_agg:
                dim_mult = 2 if concat and (layer != 0) else 1
                # aggregator at current layer
                if layer == len(num_samples) - 1:
                    aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], act=lambda x : x,
                            dropout=self.dropout if self.training else 0., 
                            name=name, concat=concat, model_size=model_size)
                else:
                    aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], act=self.act,
                            dropout=self.dropout if self.training else 0., 
                            name=name, concat=concat, model_size=model_size)
                aggregators.append(aggregator)
            else:
                aggregator = aggregators[layer]
            # hidden representation at current layer for all support nodes that are various hops away
            next_hidden = []
            # as layer increases, the number of support nodes needed decreases
            for hop in range(len(num_samples) - layer):
                dim_mult = 2 if concat and (layer != 0) else 1
                neigh_dims = [self.batch_size * support_sizes[hop], 
                              num_samples[len(num_samples) - hop - 1], 
                              dim_mult*dims[layer]]
                h = aggregator((hidden[hop],
                                tf.reshape(hidden[hop + 1], neigh_dims)))
                next_hidden.append(h)
            hidden = next_hidden
        return hidden[0], aggregators

    def decode(self):
        self.lstm_cell = lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_size)
        initial_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
        time_major_x = tf.transpose(self.input_x)
        inputs = tf.nn.embedding_lookup(self.embedding, time_major_x)
        outputs, state = tf.nn.dynamic_rnn(cell=lstm_cell,
                                            inputs=inputs, 
                                            initial_state=initial_state,
                                            time_major=True,
                                            dtype=tf.float32,
                                            scope='decode_rnn')
        # outputs: shape[max_time, batch_size, depth]
        slices = tf.split(outputs, num_or_size_splits=self.max_length, axis=0)
        return [tf.squeeze(t,[0]) for t in slices]

    def step_by_step(self, features_0, features_1_2, dims, num_samples, support_sizes, 
            aggregators=None, name=None, concat=False, model_size="small"):
        self.aggregators = None
        outputs = []
        for feature0 in features_0:
            hidden = [feature0, features_1_2[0], features_1_2[1]]
            output1, self.aggregators = self.aggregate(hidden, dims, num_samples, support_sizes,
                                        aggregators=self.aggregators, concat=concat, model_size=self.model_size)
            outputs.append(output1)
        return tf.stack(outputs, axis=0)

    def build(self):
        self.embedding = embedding = tf.get_variable('item_embedding', [self.n_items, self.emb_item],\
                                        initializer=tf.glorot_uniform_initializer())
        features_0 = self.decode() # features of zero layer nodes. 
        #outputs with shape [max_time, batch_size, dim2]
        if self.global_only:
            features_1_2 = self.global_features()
        elif self.local_only:
            features_1_2 = self.local_features()
        else:
            features_1_2 = self.global_and_local_features()
        outputs = self.step_by_step(features_0, features_1_2, self.dims, self.num_samples, self.support_sizes,
                                concat=self.concat)
        concat_self = tf.concat([features_0, outputs], axis=-1)

        # exchange first two dimensions.
        self.transposed_outputs = tf.transpose(concat_self, [1,0,2])

        self.loss = self._loss()
        self.sum_recall = self._recall()
        self.sum_ndcg = self._ndcg()
        grads_and_vars = self.optimizer.compute_gradients(self.loss)
        clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var)
                        for grad, var in grads_and_vars]
        self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars, global_step=self.global_step)
    
    def _loss(self):
        reg_loss = 0.
        xe_loss = 0.
        fc_layer = Dense(self.dim2 + self.hidden_size, self.emb_item, act=lambda x:x, dropout=self.dropout if self.training else 0.)
        self.dense_layers.append(fc_layer)
        self.logits = logits = tf.matmul(fc_layer(tf.reshape(self.transposed_outputs, [-1, self.dim2+self.hidden_size])), self.embedding, transpose_b=True)
        for dense_layer in self.dense_layers:
            for var in dense_layer.vars.values():
                reg_loss += self.weight_decay * tf.nn.l2_loss(var)
        for aggregator in self.aggregators:
            for var in aggregator.vars.values():
                reg_loss += self.weight_decay * tf.nn.l2_loss(var)
        reshaped_logits = tf.reshape(logits, [self.batch_size, self.max_length, self.n_items])
        xe_loss += tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y,
                                                            logits=reshaped_logits,
                                                            name='softmax_loss')
        xe_loss *= self.mask
        return tf.reduce_sum(xe_loss) / self.point_count + reg_loss

    def _ndcg(self):
        predictions = tf.transpose(self.logits)
        targets = tf.reshape(self.input_y, [-1])
        pred_values = tf.expand_dims(tf.diag_part(tf.nn.embedding_lookup(predictions, targets)), -1)
        tile_pred_values = tf.tile(pred_values, [1, self.n_items-1])
        ranks = tf.reduce_sum(tf.cast(self.logits[:,1:] > tile_pred_values, dtype=tf.float32), -1) + 1
        ndcg = 1. / (log2(1.0 + ranks))
        mask = tf.reshape(self.mask, [-1])
        ndcg *= mask
        return tf.reduce_sum(ndcg)

    def _recall(self):
        predictions = self.logits
        targets = tf.reshape(self.input_y, [-1])
        recall_at_k = tf.nn.in_top_k(predictions, targets, k=20)
        recall_at_k = tf.cast(recall_at_k, dtype=tf.float32)
        mask = tf.reshape(self.mask, [-1])
        recall_at_k *= mask
        return tf.reduce_sum(recall_at_k)

def log2(x):
    numerator = tf.log(x)
    denominator = tf.log(tf.constant(2, dtype=numerator.dtype))
    return numerator / denominator

TRAIN

In [None]:
#coding=utf-8

import os, sys
import argparse
import time

seed = 123
np.random.seed(seed)
tf.set_random_seed(seed)

def evaluate(sess, model, minibatch, val_or_test='val'):
    epoch_val_cost = []
    epoch_val_recall = []
    epoch_val_ndcg = []
    epoch_val_point = []
    while not minibatch.end_val(val_or_test):
        feed_dict = minibatch.next_val_minibatch_feed_dict(val_or_test)
        outs = sess.run([model.loss,model.sum_recall, model.sum_ndcg, model.point_count], feed_dict=feed_dict)
        epoch_val_cost.append(outs[0])
        epoch_val_recall.append(outs[1])
        epoch_val_ndcg.append(outs[2])
        epoch_val_point.append(outs[3])
    return np.mean(epoch_val_cost), np.sum(epoch_val_recall) / np.sum(epoch_val_point), np.sum(epoch_val_ndcg) / np.sum(epoch_val_point)

def construct_placeholders(args):
    # Define placeholders
    placeholders = {
        'input_x': tf.placeholder(tf.int32, shape=(args.batch_size, args.max_length), name='input_session'),
        'input_y': tf.placeholder(tf.int32, shape=(args.batch_size, args.max_length), name='output_session'),
        'mask_y': tf.placeholder(tf.float32, shape=(args.batch_size, args.max_length), name='mask_x'),
        'support_nodes_layer1': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_1*args.samples_2), name='support_nodes_layer1'),
        'support_nodes_layer2': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_2), name='support_nodes_layer2'),
        'support_sessions_layer1': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_1*args.samples_2,\
                                    args.max_length), name='support_sessions_layer1'),
        'support_sessions_layer2': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_2,\
                                    args.max_length), name='support_sessions_layer2'),
        'support_lengths_layer1': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_1*args.samples_2), 
                                    name='support_lengths_layer1'),
        'support_lengths_layer2': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_2), 
                                    name='support_lengths_layer2'),
    }
    return placeholders

def train(args, data):
    adj_info = data[0]
    latest_per_user_by_time = data[1]
    user_id_map = data[2]
    item_id_map = data[3]
    train_df = data[4]
    valid_df = data[5]
    test_df = data[6]
    
    args.num_items = len(item_id_map) + 1
    args.num_users = len(user_id_map)
    placeholders = construct_placeholders(args)
    if not os.path.exists(args.ckpt_dir):
        os.makedirs(args.ckpt_dir)
    ckpt_path = os.path.join(args.ckpt_dir, 'model.ckpt')

    minibatch = MinibatchIterator(adj_info,
                latest_per_user_by_time,
                [train_df, valid_df, test_df],
                placeholders,
                batch_size=args.batch_size,
                max_degree=args.max_degree,
                num_nodes=len(user_id_map),
                max_length=args.max_length,
                samples_1_2=[args.samples_1, args.samples_2])
    
    dgrec = DGRec(args, minibatch.sizes, placeholders)
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)

    total_steps = 0
    avg_time = 0.

    patience = 10
    inc = 0
    early_stopping = False

    highest_val_recall = -1.0
    start_time = time.time()
    for epoch in range(args.epochs):
        minibatch.shuffle()

        iter_cn = 0
        print('Epoch: %04d' % (epoch + 1))
        epoch_val_cost = []
        epoch_val_recall = []
        epoch_val_ndcg = []
        epoch_train_cost = []
        epoch_train_recall = []
        epoch_train_ndcg = []
        epoch_train_point = []
        
        while not minibatch.end() and not early_stopping:
            t = time.time()
            feed_dict = minibatch.next_train_minibatch_feed_dict()
            outs = sess.run([dgrec.opt_op, dgrec.loss, dgrec.sum_recall, dgrec.sum_ndcg, dgrec.point_count], feed_dict=feed_dict)
            train_cost = outs[1]
            epoch_train_cost.append(train_cost)
            epoch_train_recall.append(outs[2])
            epoch_train_ndcg.append(outs[3])
            epoch_train_point.append(outs[4])
            # Print results
            avg_time = (avg_time * total_steps + time.time() - t) / (total_steps + 1)

            if iter_cn % args.val_every == 0:
                ret = evaluate(sess, dgrec, minibatch)
                epoch_val_cost.append(ret[0])
                epoch_val_recall.append(ret[1])
                epoch_val_ndcg.append(ret[2])
                if ret[1] >= highest_val_recall:
                    saver.save(sess, ckpt_path, global_step=total_steps)
                    highest_val_recall = ret[1]
                    inc = 0
                    print("Iter:", '%d' % iter_cn, 
                          "val_loss=", "{:.5f}".format(epoch_val_cost[-1]),
                          "val_recall@20=", "{:.5f}".format(epoch_val_recall[-1]),
                          "val_ndcg=", "{:.5f}".format(epoch_val_ndcg[-1]),
                          "dump model!"
                          )
                else:
                    inc += 1
                if inc >= patience:
                    early_stopping = True
                    break

            if total_steps % args.print_every == 0:
                print("Iter:", '%d' % iter_cn, 
                      "train_loss=", "{:.5f}".format(np.mean(epoch_train_cost)),
                      "train_recall@20=", "{:.5f}".format(np.sum(epoch_train_recall)/np.sum(epoch_train_point)),
                      "train_ndcg=", "{:.5f}".format(np.sum(epoch_train_ndcg)/np.sum(epoch_train_point)),
                      "val_loss=", "{:.5f}".format(epoch_val_cost[-1]),
                      "val_recall@20=", "{:.5f}".format(epoch_val_recall[-1]),
                      "val_ndcg=", "{:.5f}".format(epoch_val_ndcg[-1]),
                      "time=", "{:.5f}s".format(avg_time))
                sys.stdout.flush()
            total_steps += 1
            iter_cn += 1
        if early_stopping:
            print('Early stop at epoch: {}, total training steps: {}'.format(epoch, total_steps))
            break
    end_time = time.time() 
    print('-----------{} seconds per batch iteration-------------'.format((end_time - start_time) / total_steps))
    print('Parameter settings: {}'.format(args.ckpt_dir))
    print('Optimization finished!\tStart testing...')
    ret = evaluate(sess, dgrec, minibatch, 'test')
    print('Test results:',
            '\tLoss:{}'.format(ret[0]),
            '\tRecall@20:{}'.format(ret[1]),
            '\tNDCG:{}'.format(ret[2]))
    
class Args():
  training = True
  global_only = False
  local_only = False
  epochs = 20
  aggregator_type='attn'
  act='relu'
  batch_size = 200
  max_degree = 50
  num_users = -1
  num_items = 100
  concat=False
  learning_rate=0.001
  hidden_size = 100
  embedding_size = 50
  emb_user = 50
  max_length=20
  samples_1=10
  samples_2=5
  dim1 = 100
  dim2 = 100
  model_size = 'small'
  dropout = 0.
  weight_decay = 0.
  decay_steps = 400
  decay_rate = 0.98
  print_every = 100
  val_every = 500
  ckpt_dir = 'save/'

args = Args()
print('Loading training data..')
data = load_data('/content')
print("Training data loaded!")
train(args, data)
tf.app.run()

Loading training data..
Training data loaded!
sessions: 613221	ratings: 2554567
sessions: 7354	ratings: 27185
sessions: 7304	ratings: 26705
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Scale of 0 disables regularizer.
Instructions for updating:
Plea

TEST

In [None]:
#coding=utf-8

def evaluate(sess, model, minibatch, val_or_test='val'):
    epoch_val_cost = []
    epoch_val_recall = []
    epoch_val_ndcg = []
    epoch_val_point = []
    input_str = []
    while not minibatch.end_val(val_or_test):
        feed_dict = minibatch.next_val_minibatch_feed_dict(val_or_test)
        x = np.reshape(feed_dict[minibatch.placeholders['input_x']], -1).tolist()
        x_str = '_'.join([str(v) for v in x if v !=0])
        input_str.append(x_str)
        outs = sess.run([model.loss,model.sum_recall, model.sum_ndcg, model.point_count], feed_dict=feed_dict)
        epoch_val_cost.append(outs[0])
        epoch_val_recall.append(outs[1])
        epoch_val_ndcg.append(outs[2])
        epoch_val_point.append(outs[3])
    return [np.mean(epoch_val_cost), np.sum(epoch_val_recall) / np.sum(epoch_val_point), np.sum(epoch_val_ndcg) / np.sum(epoch_val_point), epoch_val_recall, epoch_val_ndcg, input_str]

def construct_placeholders(args):
    # Define placeholders
    placeholders = {
        'input_x': tf.placeholder(tf.int32, shape=(args.batch_size, args.max_length), name='input_session'),
        'input_y': tf.placeholder(tf.int32, shape=(args.batch_size, args.max_length), name='output_session'),
        'mask_y': tf.placeholder(tf.float32, shape=(args.batch_size, args.max_length), name='mask_x'),
        'support_nodes_layer1': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_1*args.samples_2), name='support_nodes_layer1'),
        'support_nodes_layer2': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_2), name='support_nodes_layer2'),
        'support_sessions_layer1': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_1*args.samples_2,\
                                    args.max_length), name='support_sessions_layer1'),
        'support_sessions_layer2': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_2,\
                                    args.max_length), name='support_sessions_layer2'),
        'support_lengths_layer1': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_1*args.samples_2), 
                                    name='support_lengths_layer1'),
        'support_lengths_layer2': tf.placeholder(tf.int32, shape=(args.batch_size*args.samples_2), 
                                    name='support_lengths_layer2'),
    }
    return placeholders

def test(args, data):
    adj_info = data[0]
    latest_per_user_by_time = data[1]
    user_id_map = data[2]
    item_id_map = data[3]
    train_df = data[4]
    valid_df = data[5]
    test_df = data[6]
    
    args.num_items = len(item_id_map) + 1
    args.num_users = len(user_id_map)
    args.batch_size = 1
    placeholders = construct_placeholders(args)
    
    minibatch = MinibatchIterator(adj_info,
                latest_per_user_by_time,
                [train_df, valid_df, test_df],
                placeholders,
                batch_size=args.batch_size,
                max_degree=args.max_degree,
                num_nodes=len(user_id_map),
                max_length=args.max_length,
                samples_1_2=[args.samples_1, args.samples_2],
                training=False)
    
    dgrec = DGRec(args, minibatch.sizes, placeholders)
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)
    ckpt = tf.train.get_checkpoint_state(args.ckpt_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('Restore model from {}!'.format(args.ckpt_dir))
    else:
        print('Failed to restore model from {}'.format(args.ckpt_dir))
        sys.exit(0)
    ret = evaluate(sess, dgrec, minibatch, "test")
    print("Test results(batch_size=1):",
          "\tloss=", "{:.5f}".format(ret[0]),
          "\trecall@20=", "{:.5f}".format(ret[1]),
          "\tndcg=", "{:.5f}".format(ret[2]),
          )

    recall = ret[-3]
    ndcg = ret[-2]
    x_strs = ret[-1]
    with open('metric_dist.txt','w') as f:
        for idx in range(len(ret[-1])):
            f.write(x_strs[idx] + '\t' + str(recall[idx]) + '\t' + str(ndcg[idx]) + '\n')

class Args():
    training = False
    global_only = False
    local_only = False
    epochs = 20
    aggregator_type='attn'
    act='linear'
    batch_size = 200
    max_degree = 50
    num_users = -1
    num_items = 100
    concat=False
    learning_rate=0.001
    hidden_size = 100
    embedding_size = 100
    emb_user = 100
    max_length=20
    samples_1=10
    samples_2=5
    dim1 = 100
    dim2 = 100
    model_size = 'small'
    dropout = 0.
    weight_decay = 0.
    print_every = 100
    val_every = 500
    ckpt_dir = 'save/'

args = Args()
print('Loading data..')
data = load_data('/content')
print("Done loading data..")
test(args, data)

tf.app.run()