In [1]:
import argparse
from logging import getLogger
import os
from recbole.config import Config
from recbole.data import create_dataset
from recbole.data.utils import get_dataloader, create_samplers
from recbole.model.sequential_recommender.mbht import MBHT
from recbole.utils import init_logger, init_seed, get_model, get_trainer, set_color
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.__version__)

1.12.1


In [3]:
import torchvision.models as models

In [4]:
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', '-m', type=str, default='MBHT', help='Model for session-based rec.')
    parser.add_argument('--dataset', '-d', type=str, default='tmall_beh', help='Benchmarks for session-based rec.')
    parser.add_argument('--validation', action='store_true', help='Whether evaluating on validation set (split from train set), otherwise on test set.')
    parser.add_argument('--valid_portion', type=float, default=0.1, help='ratio of validation set.')
    parser.add_argument('--gpu_id', type=int, default=0)
    parser.add_argument('--batch_size', type=int, default=2048)
    return parser.parse_known_args()[0]

In [5]:
args = get_args()


In [6]:
# configurations initialization
config_dict = {
    'USER_ID_FIELD': 'session_id',
    'load_col': None,
    # 'neg_sampling': {'uniform':1},
    'neg_sampling': None,
    'benchmark_filename': ['train', 'test'],
    'alias_of_item_id': ['item_id_list'],
    'topk': [5, 10, 101],
    'metrics': ['Recall', 'NDCG', 'MRR'],
    'valid_metric': 'NDCG@10',
    'eval_args':{
        'mode':'full',
        'order':'TO'
        },
    'gpu_id':args.gpu_id,
    "MAX_ITEM_LIST_LENGTH":200,
    "train_batch_size": 32 if args.dataset == "ijcai_beh" else 64,
    "eval_batch_size":24 if args.dataset == "ijcai_beh" else 128,
    "hyper_len":10 if args.dataset == "ijcai_beh" else 6,
    "scales":[10, 4, 20],
    "enable_hg":1,
    "enable_ms":1,
    "customized_eval":1,
    "abaltion":""
}

if args.dataset == "retail_beh":
    config_dict['scales'] = [5, 4, 20]
    config_dict['hyper_len'] = 6

config = Config(model="MBHT", dataset=f'{args.dataset}', config_dict=config_dict)
# config['device']="cpu"
init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config, log_root="log")
logger = getLogger()

logger.info(f"PID: {os.getpid()}")
logger.info(args)
logger.info(config)

# dataset filtering
dataset = create_dataset(config)
logger.info(dataset)

# dataset splitting
train_dataset, test_dataset = dataset.build()
train_sampler, test_sampler = create_samplers(config, dataset, [train_dataset, test_dataset])
if args.validation:
    train_dataset.shuffle()
    new_train_dataset, new_test_dataset = train_dataset.split_by_ratio([1 - args.valid_portion, args.valid_portion])
    train_data = get_dataloader(config, 'train')(config, new_train_dataset, None, shuffle=True)
    test_data = get_dataloader(config, 'test')(config, new_test_dataset, None, shuffle=False)
else:
    train_data = get_dataloader(config, 'train')(config, train_dataset, train_sampler, shuffle=True)
    test_data = get_dataloader(config, 'test')(config, test_dataset, test_sampler, shuffle=False)

16 Jul 20:32    INFO  PID: 26156
16 Jul 20:32    INFO  Namespace(batch_size=2048, dataset='tmall_beh', gpu_id=0, model='MBHT', valid_portion=0.1, validation=False)
16 Jul 20:32    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = dataset/tmall_beh
show_progress = True
save_dataset = False
save_dataloaders = False
benchmark_filename = ['train', 'test']

Training Hyper Parameters:
checkpoint_dir = saved
epochs = 300
train_batch_size = 64
learner = adam
learning_rate = 0.001
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'mode': 'full', 'order': 'TO', 'split': {'RS': [0.8, 0.1, 0.1]}, 'group_by': 'user'}
metrics = ['Recall', 'NDCG', 'MRR']
topk = [5, 10, 101]
valid_metric = NDCG@10
valid_metric_bigger = True
eval_batch_size = 128
metric_decimal_place = 4

Dataset Hyper Parameters:
field_separator = 	
seq_separator =  
USER_ID_FIE

In [7]:
# model loading and initialization
model = get_model(config['model'])(config, train_data.dataset).to(config['device'])
logger.info(model)

16 Jul 20:33    INFO  MBHT(
  (item_embedding_ls): Embedding(99039, 64, padding_idx=0)
  (sequenceMixer): PreNormResidual(
    (fn): Sequential(
      (0): Conv1d(200, 800, kernel_size=(1,), stride=(1,))
      (1): GELU(approximate=none)
      (2): Dropout(p=0.5, inplace=False)
      (3): Conv1d(800, 200, kernel_size=(1,), stride=(1,))
      (4): Dropout(p=0.5, inplace=False)
    )
    (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  )
  (channelMixer): PreNormResidual(
    (fn): Sequential(
      (0): Linear(in_features=64, out_features=256, bias=True)
      (1): GELU(approximate=none)
      (2): Dropout(p=0.5, inplace=False)
      (3): Linear(in_features=256, out_features=64, bias=True)
      (4): Dropout(p=0.5, inplace=False)
    )
    (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  )
  (layers): ModuleList(
    (0): PreNormResidual(
      (fn): Sequential(
        (0): Conv1d(200, 800, kernel_size=(1,), stride=(1,))
        (1): GELU(approximate=none)
 

In [10]:
for param in model.parameters():
    print(param)  # 打印参数的形状

Parameter containing:
tensor([[ 0.0244,  0.0191,  0.0004,  ...,  0.0115, -0.0090, -0.0084],
        [ 0.0108,  0.0307,  0.0153,  ...,  0.0210, -0.0395,  0.0401],
        [ 0.0028,  0.0051, -0.0276,  ...,  0.0244, -0.0001,  0.0081],
        ...,
        [ 0.0070, -0.0012,  0.0051,  ..., -0.0033, -0.0044, -0.0047],
        [ 0.0163,  0.0056,  0.0018,  ..., -0.0022,  0.0056, -0.0261],
        [ 0.0075, -0.0016, -0.0015,  ...,  0.0160, -0.0131, -0.0305]],
       requires_grad=True)
Parameter containing:
tensor([[[ 0.0410],
         [ 0.0567],
         [ 0.0065],
         ...,
         [-0.0218],
         [-0.0534],
         [-0.0185]],

        [[-0.0545],
         [-0.0235],
         [ 0.0255],
         ...,
         [-0.0417],
         [ 0.0665],
         [ 0.0216]],

        [[ 0.0343],
         [-0.0313],
         [ 0.0330],
         ...,
         [-0.0636],
         [ 0.0448],
         [ 0.0203]],

        ...,

        [[ 0.0486],
         [-0.0479],
         [ 0.0224],
         ...,

       requires_grad=True)
Parameter containing:
tensor([[[ 0.0050],
         [ 0.0042],
         [-0.0160],
         ...,
         [-0.0328],
         [-0.0285],
         [ 0.0273]],

        [[-0.0323],
         [-0.0082],
         [ 0.0229],
         ...,
         [ 0.0209],
         [ 0.0031],
         [ 0.0078]],

        [[ 0.0025],
         [-0.0276],
         [-0.0187],
         ...,
         [-0.0303],
         [ 0.0318],
         [ 0.0031]],

        ...,

        [[-0.0057],
         [-0.0350],
         [ 0.0176],
         ...,
         [-0.0351],
         [-0.0089],
         [ 0.0308]],

        [[-0.0335],
         [ 0.0261],
         [-0.0123],
         ...,
         [ 0.0036],
         [-0.0061],
         [-0.0280]],

        [[ 0.0260],
         [-0.0110],
         [ 0.0202],
         ...,
         [-0.0006],
         [-0.0077],
         [ 0.0180]]], requires_grad=True)
Parameter containing:
tensor([-7.1376e-03, -3.6476e-03,  3.3002e-02, -3.6513e-03,  4.1841e-03,
      

       requires_grad=True)
Parameter containing:
tensor([[-0.0021,  0.0013, -0.0043,  ...,  0.0235, -0.0005, -0.0025],
        [-0.0059,  0.0018,  0.0023,  ...,  0.0237,  0.0449, -0.0149],
        [ 0.0164,  0.0033,  0.0134,  ...,  0.0085,  0.0078, -0.0006],
        ...,
        [ 0.0062,  0.0136,  0.0196,  ..., -0.0282, -0.0168,  0.0027],
        [ 0.0182, -0.0205,  0.0189,  ...,  0.0238, -0.0093, -0.0083],
        [-0.0076, -0.0119,  0.0219,  ...,  0.0050, -0.0079,  0.0091]],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

       requires_grad=True)
Parameter containing:
tensor([[-0.0010, -0.0503, -0.0289,  ..., -0.0232,  0.0059, -0.0204],
        [ 0.0035,  0.0309,  0.0257,  ...,  0.0188, -0.0279,  0.0066],
        [-0.0006, -0.0088,  0.0110,  ..., -0.0086, -0.0197, -0.0174],
        ...,
        [ 0.0139,  0.0242, -0.0091,  ..., -0.0081,  0.0080, -0.0104],
        [-0.0024, -0.0432,  0.0419,  ..., -0.0118, -0.0288,  0.0117],
        [ 0.0029, -0.0046, -0.0278,  ..., -0.0366, -0.0230,  0.0004]],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0558,  0.0329,  0.0194,  ...,  0.0077, -0.0297,  0.0420],
        [ 0.0045,  0.0142, -0.0026,  ..., -0.0093, -0.0152, -0.0262],
        [-0.0299,  0.0152,  0.0213,  ..., -0.0201, -0.0215,  0.0035],
        ...,
        [-0.0036,  0.0003, -0.0118,  ..., -0.0160, -0.0122,  0.0075],
        [-0.0158, -0.0233,  0.0109,  ...,  0.0034, -0.0012, -0.0082],
        [-0.0072,  0.0065, -0.0140,  ...,  0.0187,  0.0085, -0.0231]],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.

       requires_grad=True)
Parameter containing:
tensor([[ 0.0393, -0.0146,  0.0094,  ...,  0.0032,  0.0039,  0.0167],
        [-0.0139,  0.0040, -0.0179,  ...,  0.0393,  0.0156,  0.0116],
        [ 0.0304, -0.0137,  0.0229,  ...,  0.0133,  0.0067,  0.0374],
        ...,
        [ 0.0044,  0.0022,  0.0014,  ..., -0.0184, -0.0053, -0.0146],
        [ 0.0159, -0.0166,  0.0161,  ...,  0.0138, -0.0214, -0.0074],
        [ 0.0078, -0.0304, -0.0260,  ...,  0.0286,  0.0137, -0.0046]],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       requires_grad=True)
Parameter containing:
tensor([[-0.0109, -0.0206,  0.0020,  ...,  0.0151,  0.0070, -0.0061],
        [-0.0147,  0.0008,  0.0449,  ...,  0.0043,  0.0134,  0.0276]

       requires_grad=True)
Parameter containing:
tensor([[ 0.0123, -0.0460, -0.0365,  ..., -0.0092,  0.0187, -0.0242],
        [ 0.0135, -0.0086, -0.0079,  ..., -0.0360,  0.0037, -0.0056],
        [ 0.0049,  0.0407,  0.0267,  ..., -0.0205, -0.0166, -0.0179],
        ...,
        [-0.0101,  0.0446,  0.0168,  ..., -0.0121,  0.0332, -0.0143],
        [ 0.0232, -0.0143,  0.0185,  ...,  0.0140, -0.0359,  0.0128],
        [ 0.0206,  0.0107, -0.0099,  ..., -0.0032, -0.0032, -0.0254]],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       requires_grad=True)
Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.

In [11]:
for name, param in model.named_parameters():
    print(name)

item_embedding_ls.weight
sequenceMixer.fn.0.weight
sequenceMixer.fn.0.bias
sequenceMixer.fn.3.weight
sequenceMixer.fn.3.bias
sequenceMixer.norm.weight
sequenceMixer.norm.bias
channelMixer.fn.0.weight
channelMixer.fn.0.bias
channelMixer.fn.3.weight
channelMixer.fn.3.bias
channelMixer.norm.weight
channelMixer.norm.bias
LayerNorm.weight
LayerNorm.bias
output_layer_1_1.weight
output_layer_1_1.bias
output_layer_2_1.weight
output_layer_2_1.bias
type_embedding.weight
item_embedding.weight
position_embedding.weight
trm_encoder.layer.0.multi_head_attention.out_fc.weight
trm_encoder.layer.0.multi_head_attention.out_fc.bias
trm_encoder.layer.0.multi_head_attention.attention1.E.weight
trm_encoder.layer.0.multi_head_attention.attention1.E.bias
trm_encoder.layer.0.multi_head_attention.attention1.F.weight
trm_encoder.layer.0.multi_head_attention.attention1.F.bias
trm_encoder.layer.0.multi_head_attention.attention1.W_V.weight
trm_encoder.layer.0.multi_head_attention.attention1.W_V.bias
trm_encoder.lay