In [1]:
import configparser as cp
import re, os
from time import time
import tensorflow as tf
import numpy as np
from DataModule import  DataModule
from Evaluate import Evaluate
from diffnet import diffnet
from Logging import Logging

In [2]:
config_path = os.path.join(os.getcwd(), 'data/yelp_diffnet.ini')
model_name = 'diffnet'

In [3]:
class ParserConf():

    def __init__(self, config_path):
        self.config_path = config_path
        
    def processValue(self,key,value):
        print(key, value)
        tmp = value.split(' ')
        dtype = tmp[0]
        value = tmp[1:]
#         print(dtype, value)
        
        if value != None:
            if dtype == 'string':
                self.conf_dict[key] = vars(self)[key] = value[0]
            elif dtype == 'int':
                self.conf_dict[key] = vars(self)[key] = int(value[0])
            elif dtype == 'float':
                self.conf_dict[key] = vars(self)[key] = float(value[0])
            elif dtype == 'list':
                self.conf_dict[key] = vars(self)[key] = [i for i in value]
            elif dtype == 'int_list':
                self.conf_dict[key] = vars(self)[key] = [int(i) for i in value]
            elif dtype == 'float_list':
                self.conf_dict[key] = vars(self)[key] = [float(i) for i in value]
        else:
            print('%s value is None' % key)
    
    def parserConf(self):
        conf = cp.ConfigParser()
        conf.read(self.config_path)
        self.conf = conf
        
        self.conf_dict = {}
        for section in conf.sections():
            for (key, value) in conf.items(section):
                self.processValue(key, value)
        
        self.data_dir = os.path.join(os.getcwd(),'data')
        self.links_filename = os.path.join(os.getcwd(),'data/yelp.links')
        self.user_review_vector_matrix=os.path.join(os.getcwd(), 'data/user_vector.npy')
        self.item_review_vector_matrix = os.path.join(os.getcwd(), 'data/item_vector.npy')
#         self.pre_model = os.path.join(os.getcwd(), 'pretrain/%s/%s' % (self.data_name, self.pre_model))

class DataUtil():
    def __init__(self, conf):
        self.conf = conf
        #print('DataUtil, Line12, test- conf data_dir:%s' % self.conf.data_dir)

    def initializeRankingHandle(self):
        #t0 = time()
        self.createTrainHandle()
        self.createEvaluateHandle()
        #t1 = time()
        #print('Prepare data cost:%.4fs' % (t1 - t0))
    
    def createTrainHandle(self):
        data_dir = self.conf.data_dir
        # train  data
        train_filename = "%s/%s.train.rating" % (data_dir, self.conf.data_name)
        val_filename = "%s/%s.val.rating" % (data_dir, self.conf.data_name)
        test_filename = "%s/%s.test.rating" % (data_dir, self.conf.data_name)

        self.train = DataModule(self.conf,train_filename)
        self.val = DataModule(self.conf,val_filename)
        self.test = DataModule(self.conf,test_filename)
        

 
    def createEvaluateHandle(self):
        data_dir = self.conf.data_dir
        # eval data
        val_filename = "%s/%s.val.rating" % (data_dir, self.conf.data_name)
        test_filename = "%s/%s.test.rating" % (data_dir, self.conf.data_name)

        self.val_eva = DataModule(self.conf, val_filename)
        self.test_eva = DataModule(self.conf, test_filename)

### 参数设置

In [4]:
conf = ParserConf(config_path)
conf.parserConf()

num_users int 17237
num_items int 38342
gpu_device int 1
data_name string yelp
model_name string diffnet
dimension int 32
learning_rate float 0.001
epochs int 10
num_negatives int 8
num_evaluate int 1000
num_procs int 16
topk int 10
evaluate_batch_size int 128
training_batch_size int 128
epoch_notice int 300
pretrain_flag int 1
pre_model string diffnet_hr_0.3437_ndcg_0.2092_epoch_98.ckpt


In [5]:
conf.conf_dict

{'data_name': 'yelp',
 'dimension': 32,
 'epoch_notice': 300,
 'epochs': 10,
 'evaluate_batch_size': 128,
 'gpu_device': 1,
 'learning_rate': 0.001,
 'model_name': 'diffnet',
 'num_evaluate': 1000,
 'num_items': 38342,
 'num_negatives': 8,
 'num_procs': 16,
 'num_users': 17237,
 'pre_model': 'diffnet_hr_0.3437_ndcg_0.2092_epoch_98.ckpt',
 'pretrain_flag': 1,
 'topk': 10,
 'training_batch_size': 128}

### train

In [6]:
data = DataUtil(conf)
model = eval(model_name)
model = model(conf)
evaluate = Evaluate(conf)

In [7]:
log_dir = os.path.join(os.getcwd(), 'log')
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
# define log name 
log_path = os.path.join(os.getcwd(), 'log/%s_%s.log' % (conf.data_name, conf.model_name))

data.initializeRankingHandle()

d_train, d_val, d_test, d_test_eva = data.train, data.val, data.test, data.test_eva

In [8]:
print('System start to load data...')
t0 = time()
d_train.initializeRankingTrain()
d_val.initializeRankingVT()
d_test.initializeRankingVT()
d_test_eva.initalizeRankingEva()
t1 = time()
print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0))

System start to load data...
Data has been loaded successfully, cost:24.0019s


In [9]:
# prepare model necessary data.
data_dict = d_train.prepareModelSupplement(model)
model.inputSupply(data_dict)
model.startConstructGraph()

In [10]:
# standard tensorflow running environment initialize
tf_conf = tf.ConfigProto()
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
# tf_conf.gpu_options.allow_growth = True


In [11]:
with tf.Session(config=tf_conf) as sess:
    sess.run(model.init)
#     if conf.pretrain_flag == 1:
#         model.saver.restore(sess, conf.pre_model)

    # set debug_flag=0, doesn't print any results
    log = Logging(log_path)
    log.record('Following will output the evaluation of the model:')
    # Start Training !!!
    for epoch in range(1, conf.epochs+1):
        # optimize model with training data and compute train loss
        tmp_train_loss = []
        t0 = time()

        #tmp_total_list = []
        while d_train.terminal_flag:
            d_train.getTrainRankingBatch()
            d_train.linkedMap()

            train_feed_dict = {}
            for (key, value) in model.map_dict['train'].items():
                train_feed_dict[key] = d_train.data_dict[value]
                
            [sub_train_loss, _] = sess.run([model.map_dict['out']['train'], model.opt], feed_dict=train_feed_dict)
            tmp_train_loss.append(sub_train_loss)
        train_loss = np.mean(tmp_train_loss)
        t1 = time()

        # compute val loss and test loss
        d_val.getVTRankingOneBatch()
        d_val.linkedMap()
        val_feed_dict = {}
        for (key, value) in model.map_dict['val'].items():
            val_feed_dict[key] = d_val.data_dict[value]
        val_loss = sess.run(model.map_dict['out']['val'], feed_dict=val_feed_dict)

        d_test.getVTRankingOneBatch()
        d_test.linkedMap()
        test_feed_dict = {}
        for (key, value) in model.map_dict['test'].items():
            test_feed_dict[key] = d_test.data_dict[value]
        test_loss = sess.run(model.map_dict['out']['test'], feed_dict=test_feed_dict)
        t2 = time()

        # start evaluate model performance, hr and ndcg
        def getPositivePredictions():
            d_test_eva.getEvaPositiveBatch()
            d_test_eva.linkedRankingEvaMap()
            eva_feed_dict = {}
            for (key, value) in model.map_dict['eva'].items():
                eva_feed_dict[key] = d_test_eva.data_dict[value]
            positive_predictions = sess.run(
                model.map_dict['out']['eva'],
                feed_dict=eva_feed_dict
            )
            return positive_predictions

        def getNegativePredictions():
            negative_predictions = {}
            terminal_flag = 1
            while terminal_flag:
                batch_user_list, terminal_flag = d_test_eva.getEvaRankingBatch()
                d_test_eva.linkedRankingEvaMap()
                eva_feed_dict = {}
                for (key, value) in model.map_dict['eva'].items():
                    eva_feed_dict[key] = d_test_eva.data_dict[value]
                index = 0
                tmp_negative_predictions = np.reshape(
                    sess.run(
                        model.map_dict['out']['eva'],
                        feed_dict=eva_feed_dict
                    ),
                    [-1, conf.num_evaluate])
                for u in batch_user_list:
                    negative_predictions[u] = tmp_negative_predictions[index]
                    index = index + 1
            return negative_predictions

        tt2 = time()

        index_dict = d_test_eva.eva_index_dict
        positive_predictions = getPositivePredictions()
        negative_predictions = getNegativePredictions()

        d_test_eva.index = 0 # !!!important, prepare for new batch
        hr, ndcg = evaluate.evaluateRankingPerformance(\
            index_dict, positive_predictions, negative_predictions, conf.topk, conf.num_procs)
        tt3 = time()
                
        # print log to console and log_file
        log.record('Epoch:%d, compute loss cost:%.4fs, train loss:%.4f, val loss:%.4f, test loss:%.4f' % \
            (epoch, (t2-t0), train_loss, val_loss, test_loss))
        log.record('Evaluate cost:%.4fs, hr:%.4f, ndcg:%.4f' % ((tt3-tt2), hr, ndcg))


Following will output the evaluation of the model:
19445676.0


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


nan
nan
nan
nan
nan
nan
nan
nan
nan
