In [1]:
#Pytorch
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import multivariate_normal, normal
#Python Libs
import numpy as np
from time import time
import datetime
import gc
import math
#Implementations
from model.models import INDIG
from dataset import GDataset
from batch_test import *

In [2]:
class Config(object):
    def __init__(self, dataset, estimator):
        self.dataset = dataset 
        self.path = 'data/' + self.dataset + '/'
        self.estimator = estimator
        self.user_dataset = self.path + self.dataset + '.train.ui'
        self.group_dataset = self.path + self.dataset
        self.user_in_group_path = self.path+ self.dataset + ".train.gu"
        self.embedding_size = 32
        self.epoch = 256
        self.num_negatives = 4
        self.batch_size = 256
        self.lr = 0.001
        self.drop_ratio = 0.6
        self.selfsuplamb = 0.0001
        self.topK = [10,]
        self.seed = 2022
        self.eval_freq = 5
        self.cuda = True
        self.WU_point = -1 #warm_up point
        self.ui_available = False #num of pretrains
    
    def __str__(self):
        print('{:<25}: {:}'.format('Data', config.dataset))
        print('{:<25}: {:}'.format('Estimator', config.estimator))
        print('{:<25}: {:}'.format('#neg', config.num_negatives))
        print('{:<25}: {:}'.format('lr', config.lr))
        print('{:<25}: {:}'.format('drop-rate', config.drop_ratio))
        print('{:<25}: {:}'.format('self-sup', config.selfsuplamb))     
        return ''

In [3]:
config = Config('douban', 'mf3') #meetupCA, gwl
print(vars(config))
torch.manual_seed(config.seed)  # Set the random seed manually for reproducibility.

{'dataset': 'douban', 'path': 'data/douban/', 'estimator': 'mf3', 'user_dataset': 'data/douban/douban.train.ui', 'group_dataset': 'data/douban/douban', 'user_in_group_path': 'data/douban/douban.train.gu', 'embedding_size': 32, 'epoch': 256, 'num_negatives': 4, 'batch_size': 256, 'lr': 0.001, 'drop_ratio': 0.6, 'selfsuplamb': 0.0001, 'topK': [10], 'seed': 2022, 'eval_freq': 5, 'cuda': True, 'WU_point': -1, 'ui_available': False}


<torch._C.Generator at 0x19835e14c10>

In [4]:
device = torch.device("cuda" if config.cuda else "cpu")
print(device)

cuda


In [5]:
dataset = GDataset(config.user_dataset, config.group_dataset, config.user_in_group_path, config.num_negatives)
num_groups, num_users, num_items = dataset.num_groups, dataset.num_users, dataset.num_items
print("num_groups: "+str(num_groups)+" num_users: "+str(num_users)+" num_items: "+str(num_items))

gu_dict = dataset.gu_dict
ui_dict = None
if config.ui_available:
    ui_dict = dataset.ui_dict

test_warm = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_warm")
test_cold = dataset.load_rating_file_as_dict(config.group_dataset + ".test.rating_cold")
dataset.num_negatives = config.num_negatives
valid = dataset.load_rating_file_as_dict(config.group_dataset + ".valid.rating")

num_groups: 92489 num_users: 63893 num_items: 15741


In [6]:
def print_eval(ret, KS=[], data_type = 'warm'):
    for i, k in enumerate(KS):
        print('\t\t @%d: (%s) HR = %.4f, NDCG = %.4f, Rec = %.4f' % (k, data_type, ret['hit_ratio'][i], ret['ndcg'][i], ret['recall'][i]))

In [7]:
def valid_model(model, test_groups, Ks, group_trainRatings, num_items, device):
    valid_group = []
    valid_item = []
    for g in test_groups:
        for v in test_groups[g]:
            valid_group.append(g)
            valid_item.append(v)

            
    group_var = torch.LongTensor(valid_group).to(device)
    item_var = torch.LongTensor(valid_item).to(device)

    model.eval()
    rating, _ = model(group_var, item_var)
    avg_rating = torch.mean(rating).item()
    return avg_rating

In [8]:

model = INDIG(num_users, num_items, num_groups, config.embedding_size, gu_dict, dataset.group_trainRatings, ui_dict, config.drop_ratio, device).to(device)

best_checkpoint = -1.0
sub_checkpoint = -1.0
best_weights_path = None

print(config)

optimizer = optim.Adam(model.parameters(), lr=config.lr)
print('====================================')
# train the model
for epoch in range(config.epoch): 
    train_loader = dataset.get_group_dataloader(config.batch_size)
    t1 = time()
    tau = 1.0 - 0.25*(math.cos(math.pi*epoch/config.epoch)+1) #cosine annealing
    losses = []
    model.train()
    for batch_id, (g, pi_ni) in enumerate(train_loader):
        # Data Load
        group_input = g.to(device, non_blocking=True)
        pos_item_input = pi_ni[:, 0].to(device, non_blocking=True)
        neg_item_input = pi_ni[:, 1].to(device, non_blocking=True)

        #zero
        optimizer.zero_grad()

        transduc_loss, dkl, induc_loss, selfsup_loss = model(group_input, pos_item_input, neg_item_input)
        loss = (1-tau)*(transduc_loss + dkl +  config.selfsuplamb*selfsup_loss) + tau*induc_loss

        if not torch.isinf(loss.data) and not torch.isnan(loss.data):
            losses.append(float(loss.item()))
        else:
            print('ERROR!\t', torch.mean(-log_p).item(), torch.mean(-log_p2).item(), torch.mean(dkl).item(), selfsup_loss.item())
        # Backward
        loss.backward()
        optimizer.step()

    print('Iteration %d,\tloss: [%.4f], time: [%.1fs], tau: [%.3f]' % (epoch, np.mean(np.array(losses)), time() - t1, tau))
    gc.collect()

    if epoch > config.WU_point and epoch%config.eval_freq == 0:
        model.eval()
        with torch.no_grad():
            t2 = time()
            ret = test(model, valid, config.topK,  dataset.group_trainRatings, dataset.num_items, device) #See batch_test
            print('\t Evaluation done [%.1f s]' % (time() - t2))
            print_eval(ret, config.topK)
            cur_sub_checkpoint = ret['hit_ratio'][0]
            current_weights_path = 'weights/INDIG_'+str(config.estimator)+'_'+str(config.dataset)+"_"+str(config.lr)+"_"+str(config.drop_ratio)+"_"+str(config.selfsuplamb)+'_'+str(epoch)
            cur_checkpoint = 0
            '''
            t2 = time()
            ret = test(model, test_cold, config.topK,  dataset.group_trainRatings, dataset.num_items, device) #See batch_test
            print('\t Evaluation done [%.1f s]' % (time() - t2))
            print_eval(ret, config.topK, data_type='cold')
            cur_checkpoint = ret['hit_ratio'][0]
            '''
            if (best_checkpoint < cur_checkpoint and sub_checkpoint == cur_sub_checkpoint) or (sub_checkpoint < cur_sub_checkpoint):
                best_weights_path = current_weights_path
                best_checkpoint = cur_checkpoint
                sub_checkpoint = cur_sub_checkpoint                
                torch.save(model.state_dict(), current_weights_path)
                print('SAVED! at epoch ', epoch)

Data                     : douban
Estimator                : mf3
#neg                     : 4
lr                       : 0.001
drop-rate                : 0.6
self-sup                 : 0.0001

Iteration 0,	loss: [0.5822], time: [1020.1s], tau: [0.500]
	 Evaluation done [2470.7 s]
		 @10: (warm) HR = 0.0042, NDCG = 0.0013, Rec = 0.0042
SAVED! at epoch  0
Iteration 1,	loss: [0.4017], time: [1025.0s], tau: [0.500]
Iteration 2,	loss: [0.2920], time: [1017.9s], tau: [0.500]
Iteration 3,	loss: [0.2370], time: [1020.6s], tau: [0.500]
Iteration 4,	loss: [0.2044], time: [1017.1s], tau: [0.500]
Iteration 5,	loss: [0.1817], time: [1004.1s], tau: [0.500]
	 Evaluation done [2336.6 s]
		 @10: (warm) HR = 0.0106, NDCG = 0.0057, Rec = 0.0106
SAVED! at epoch  5
Iteration 6,	loss: [0.1649], time: [1018.0s], tau: [0.501]
Iteration 7,	loss: [0.1540], time: [1020.1s], tau: [0.501]
Iteration 8,	loss: [0.1437], time: [1009.4s], tau: [0.501]
Iteration 9,	loss: [0.1362], time: [1023.5s], tau: [0.502]
Iteration

In [9]:
model = INDIG(num_users, num_items, num_groups, config.embedding_size, gu_dict, dataset.group_trainRatings, ui_dict, config.drop_ratio, device).to(device)
gc.collect()
state = torch.load(best_weights_path)
model.load_state_dict(state)

print('====================================')
print(best_weights_path)
print(config)


model.eval()
with torch.no_grad():
    ret = test(model, test_cold, config.topK,  dataset.group_trainRatings, dataset.num_items, device) #See batch_test
    print_eval(ret, config.topK[:2], 'cold')
    ret = test(model, test_warm, config.topK,  dataset.group_trainRatings, dataset.num_items, device) #See batch_test
    print_eval(ret, config.topK[:2])

print('====================================')

weights/INDIG_mf3_douban_0.001_0.6_0.0001_135
Data                     : douban
Estimator                : mf3
#neg                     : 4
lr                       : 0.001
drop-rate                : 0.6
self-sup                 : 0.0001

		 @10: (cold) HR = 0.0266, NDCG = 0.0130, Rec = 0.0266
		 @10: (warm) HR = 0.0721, NDCG = 0.0360, Rec = 0.0721
