In [1]:
model_name = 'AttMix_S'#['FPMC_S','AttMix_S','STAMP_S']
data = 'Games' #['Games','ML']

In [4]:
import argparse
import numpy as np
from utils.dataset import Dataset,DataIterator,get_DataLoader

def parse_args(name,model_name):   
    parser = argparse.ArgumentParser(description="Run .")  
    parser.add_argument('--model', nargs='?', default=model_name)
    parser.add_argument('--dataset', nargs='?', default=name,
                        help='Choose a dataset.')
    parser.add_argument('--batch_size', type=int, default=1024,
                        help='Batch size.')
    parser.add_argument('--hidden_factor', type=int, default=10,
                        help='Number of hidden factors.')
    parser.add_argument('--lamda', type=float, default = 10e-5,
                        help='Regularizer for bilinear part.')
    parser.add_argument('--lr', type=float, default=0.001,
                        help='Learning rate.')
    parser.add_argument('--per_test', type=int, default=20,
                        help='Learning rate.')   
    parser.add_argument('--topN', type=int, default=50,
                        help='Learning rate.')  
    
    
    return parser.parse_args(args=[])

In [5]:
args = parse_args(data,model_name)
data = Dataset(args)


loading data: meta_data

loading data: interaction_data

split data



In [6]:
from utils.log import LOG
log = LOG(args)



In [7]:
train_data =  get_DataLoader(data.train,args.batch_size, seq_len=10)
valid_data =  get_DataLoader(data.valid,args.batch_size, seq_len=10,train_flag=0)
test_data =  get_DataLoader(data.test,args.batch_size, seq_len=10,train_flag=0)


Using time span 128
total session: 12054
Using time span 128
total session: 4018
Using time span 128
total session: 4019


In [8]:
if 'AttMix' in model_name:
    from AttMix import AttMix
    model = AttMix(data.n_item,args.hidden_factor,args.batch_size,args)
if 'FPMC' in model_name:
    from FPMC import FPMC
    model = FPMC(data.n_item,args.hidden_factor,args.batch_size)
if 'STAMP' in model_name:
    from STAMP import STAMP
    model = STAMP(data.n_item,args.hidden_factor,args.batch_size)
model = model.cuda()

In [None]:
from tqdm import tqdm
from utils.evaluation import evaluate
from utils.log import load_model, save_model
import time
import sys
import torch

def to_tensor(var, device):
    var = torch.Tensor(var)
    var = var.to(device)
    return var.long()

optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)#, weight_decay=args.weight_decay)
best_metric = 0
for iter, (targets, items, mask,_) in enumerate(train_data):
    #训练
    model.train()
    optimizer.zero_grad()
    targets_cuda = to_tensor(targets,'cuda')
    items_cuda = to_tensor(items,'cuda')
    mask_cuda = to_tensor(mask,'cuda')
    negative_cuda = to_tensor(data.uniform_negative_sample(targets_cuda,1),'cuda')

    user_eb, scores = model(items_cuda,mask_cuda)
    loss = model.loss(user_eb,targets_cuda,negative_cuda)
    loss.backward()
    optimizer.step()
    if iter % args.per_test  == 0:#
        start_time = time.time()
        print(iter)
        model.eval()
        metrics = evaluate(model, valid_data,25,args=args)
        log_str = 'iter: %d, train loss: %.4f' % (iter, loss) # 打印loss
        if metrics != {}:
            log_str += ', ' + ', '.join(['valid ' + key + ': %.6f' % value for key, value in metrics.items()])
        print(log_str)
        log.write_str(log_str)
        # 保存recall最佳的模型
        if 'recall' in metrics:
            recall = metrics['recall']
            if recall > best_metric:
                best_metric = recall
                save_model(model, log.best_model_path)
                trials = 0
            else:
                trials += 1
                args.patience = 10 #if args.dataset =='rocket' else 3 
                if trials > args.patience: # early stopping
                    print("early stopping!")
                    break
        # 每次test之后loss_sum置零
        total_loss = 0.0
        test_time = time.time()
        print("time interval: %.4f min" % ((test_time-start_time)/60.0))
        sys.stdout.flush()
    if iter >=  10000: # 超过最大迭代次数，退出训练
        break

load_model(model, log.best_model_path)
model.eval()

# 训练结束后用valid_data测试一次
metrics = evaluate(model, valid_data,50,args=args)
print(', '.join(['Valid ' + key + ': %.6f' % value for key, value in metrics.items()]))
# 训练结束后用test_data测试一次
print("Test result:")
metrics = evaluate(model, test_data,5,args=args)
for key, value in metrics.items():
    output = 'test ' + key + '@5' + '=%.6f' % value
    print(output)
    log.write_str(output)
metrics = evaluate(model, test_data,10,args=args)
for key, value in metrics.items():
    output = 'test ' + key + '@10' + '=%.6f' % value
    print(output)
    log.write_str(output)    

0
iter: 0, train loss: 0.7018, valid recall: 0.000249, valid ndcg: 0.000060
time interval: 0.0100 min
20
iter: 20, train loss: 0.6975, valid recall: 0.000249, valid ndcg: 0.000059
time interval: 0.0060 min
40
iter: 40, train loss: 0.6835, valid recall: 0.000249, valid ndcg: 0.000079
time interval: 0.0060 min


In [None]:
user_eb,targets_cuda,negative_cuda