# 定义原始效率计算函数

In [36]:
import time
import torch
import torchprofile
import subprocess
from torch.nn import DataParallel

def get_gpu_memory():
    result = subprocess.check_output(
        [
            'nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,nounits,noheader'
        ]).decode('utf-8').strip().split('\n')
    
    used_memory = 0
    total_memory = 0
    for res in result:
        used, total = map(int, res.split(','))
        used_memory += used
        total_memory += total

    return used_memory, total_memory

# 测试函数
used, total = get_gpu_memory()
print(f"GPU Memory Usage: {used} MB / {total} MB")

def evaluate_model_efficiency(model, input, criterion, optimizer):
    device = torch.device("cuda:5")

    padding_mask = torch.zeros_like(input)
    x_mark_enc = padding_mask.float().to(device)
    x_enc = input.to(device)
    model = model.to(device)
    # model = DataParallel(model)
    # model.to('cuda')
    # 将所有的输入数据打包成一个列表或元组
    inputs = [x_enc, x_mark_enc]

    # 将所有的非张量参数打包成一个字典
    kwargs = { 'mask':None,'enable_mask':None,'task_id': 1, 'task_name': 'classification'}

    # 使用*运算符将输入列表解包，使用**运算符将关键字参数字典解包
    # 评估前向传播时间
    start_time = time.time()
    output = model(*inputs, **kwargs)
    end_time = time.time()
    forward_time = end_time - start_time
    
    # 评估前向和反向传播时间
    start_time = time.time()
    output = model(*inputs, **kwargs)
    target = torch.randn_like(output).to(device)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    end_time = time.time()
    total_time = end_time - start_time
    
    inputs_tuple = (x_enc, x_mark_enc,None,None,None,1,'classification',None)

    # 获取 FLOPs
    flops = torchprofile.profile_macs(model, inputs_tuple) / 1e9
    
    # # 获取 GPU 显存使用情况
    used_memory, total_memory = get_gpu_memory()
    used_memory, total_memory = used_memory/1024 , total_memory/1024
    print("Forward pass time: {:.6f} seconds".format(forward_time))
    print("Forward and backward pass time: {:.6f} seconds".format(total_time))
    print("FLOPs: {:.6f} GFLOPs".format(flops))
    print(f"GPU Memory Usage: {used_memory:.2f} GB / {total_memory:.2f} GB")

GPU Memory Usage: 58348 MB / 196608 MB


# 导入模型

In [28]:
import argparse
import torch
from exp.exp_sup import Exp_All_Task as Exp_All_Task_SUP
import random
import numpy as np
import sys

class TrainingConfig:
    def __init__(Exp):
        Exp.parser = argparse.ArgumentParser(description='RmGPT supervised training')
        Exp.add_arguments()

    def add_arguments(Exp):
        # basic config
        Exp.parser.add_argument('--task_name', type=str, default='ALL_task', help='task name')
        Exp.parser.add_argument('--is_training', type=int, default=1, help='status')
        Exp.parser.add_argument('--model_id', type=str, default='test', help='model id')
        Exp.parser.add_argument('--model', type=str, default='RmGPT', help='model name')

        # data loader
        Exp.parser.add_argument('--data', type=str, default='All', help='dataset type')
        Exp.parser.add_argument('--features', type=str, default='M',
                                 help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
        Exp.parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
        Exp.parser.add_argument('--freq', type=str, default='h',
                                 help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
        Exp.parser.add_argument('--task_data_config_path', type=str, default='data_provider/data_config/main_result/multi_task_small.yaml', help='root path of the task and data yaml file')
        Exp.parser.add_argument('--subsample_pct', type=str, default=None, help='subsample percent')
        
        # device settings
        Exp.parser.add_argument('--device', type=str, default='cuda:0', help='device')
        
        # ddp settings
        Exp.parser.add_argument('--ddp', type=bool, default=False, help='whether to use ddp')
        Exp.parser.add_argument('--local-rank', type=int, help='local rank')
        Exp.parser.add_argument("--dist_url", default="env://", type=str, help='url used to set up distributed training')
        Exp.parser.add_argument('--num_workers', type=int, default=8, help='data loader num workers')
        Exp.parser.add_argument("--memory_check", action="store_true", default=False)
        Exp.parser.add_argument("--large_model", action="store_true", default=True)

        # optimization settings
        Exp.parser.add_argument('--itr', type=int, default=1, help='experiments times')
        Exp.parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
        Exp.parser.add_argument("--prompt_tune_epoch", type=int, default=0)
        Exp.parser.add_argument('--warmup_epochs', type=int, default=0, help='warmup epochs')
        Exp.parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
        Exp.parser.add_argument('--acc_it', type=int, default=1, help='acc iteration to enlarge batch size')
        Exp.parser.add_argument('--learning_rate', type=float, default=0.0003, help='optimizer learning rate')
        Exp.parser.add_argument('--min_lr', type=float, default=None, help='optimizer min learning rate')
        Exp.parser.add_argument('--weight_decay', type=float, default=0.0, help='optimizer weight decay')
        Exp.parser.add_argument('--layer_decay', type=float, default=None, help='optimizer layer decay')
        Exp.parser.add_argument('--des', type=str, default='test', help='exp description')
        Exp.parser.add_argument('--lradj', type=str, default='prompt_tuning', help='adjust learning rate')
        Exp.parser.add_argument('--clip_grad', type=float, default=5.0, help='Clip gradient norm')
        Exp.parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
        Exp.parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='save location of model checkpoints')
        Exp.parser.add_argument('--pretrained_weight', type=str, default='None')
        Exp.parser.add_argument('--debug', type=str, default='disabled', help='debug mode')
        Exp.parser.add_argument('--project_name', type=str, default='RmGPT-multitask', help='wandb project name')

        # model settings
        Exp.parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
        Exp.parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
        Exp.parser.add_argument('--e_layers', type=int, default=4, help='num of encoder layers')
        Exp.parser.add_argument("--share_embedding", action="store_true", default=False)
        Exp.parser.add_argument("--patch_len", type=int, default=256)
        Exp.parser.add_argument("--stride", type=int, default=256)
        Exp.parser.add_argument("--prompt_num", type=int, default=10)
        Exp.parser.add_argument('--fix_seed', type=int, default=2024, help='seed')
        Exp.parser.add_argument("--input_len", type=int, default=2048)
        Exp.parser.add_argument('--mode_debug',type=bool,default=False,help='whether to debug')

        # task related settings
        Exp.parser.add_argument('--inverse', action='store_true', default=False, help='inverse output data')
        Exp.parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')
        Exp.parser.add_argument('--anomaly_ratio', type=float, default=1.0, help='prior anomaly ratio (%)')
        Exp.parser.add_argument("--offset", type=int, default=0)
        Exp.parser.add_argument("--max_offset", type=int, default=0)
        Exp.parser.add_argument('--zero_shot_forecasting_new_length', type=str, default=None, help='unify')

    def parse(Exp, args=None):
        # 如果没有提供 args，使用 sys.argv[1:] 的默认行为
        # 在 Jupyter 中，可以传递空列表来使用默认值
        if args is None:
            args = sys.argv[1:]
        return Exp.parser.parse_args(args)

# Example usage in Jupyter:
config = TrainingConfig()
args = config.parse([])  # 传递空列表以避免解析命令行参数

print(args.task_name)  # 输出一个参数的值以检查

ALL_task


# 导入CWRU数据集

In [26]:
Exp = Exp_All_Task_SUP(args)
_,data_loader_list = Exp._get_data(flag='train')
data_loader = data_loader_list[1]
batch = next(iter(data_loader))
input,label =batch

device id cuda:0
Non-cross condition setting when [train stage]
[SMU] start_percentage: 0.0, end_percentage: 0.8
PHM_SMU 114
PHM_Challenge2024 12240


In [4]:
input.shape

torch.Size([5, 2048, 21])

# 设置导入模型

In [5]:
args.model = 'RmGPT'
#释放显存
torch.cuda.empty_cache()
Exp = Exp_All_Task_SUP(args)
model  = Exp.model
# 示例调用（需要根据实际情况定义模型、输入、损失函数和优化器）
criterion = Exp._select_criterion(Exp.task_data_config_list)[0]  # 定义损失函数
optimizer = Exp._select_optimizer()  # 定义优化器
evaluate_model_efficiency(model, input, criterion, optimizer)

device id cuda:0
base lr: 1.92e-03
actual lr: 4.69e-05
accumulate grad iterations: 1
effective batch size: 5
Forward pass time: 0.152402 seconds
Forward and backward pass time: 0.361839 seconds
FLOPs: 51.097429 GFLOPs


In [6]:
args.model = 'RmGPT_woAtten'
#释放显存
torch.cuda.empty_cache()
Exp = Exp_All_Task_SUP(args)
model  = Exp.model
optimizer = Exp._select_optimizer()  # 定义优化器
evaluate_model_efficiency(model, input, criterion, optimizer)

device id cuda:0
base lr: 1.92e-03
actual lr: 4.69e-05
accumulate grad iterations: 1
effective batch size: 5
Forward pass time: 0.092807 seconds
Forward and backward pass time: 0.377280 seconds
FLOPs: 51.106890 GFLOPs


In [7]:
args.model = 'RmGPT_woPatch'
#释放显存
torch.cuda.empty_cache()
Exp = Exp_All_Task_SUP(args)
model  = Exp.model
# 示例调用（需要根据实际情况定义模型、输入、损失函数和优化器）
optimizer = Exp._select_optimizer()  # 定义优化器
evaluate_model_efficiency(model, input, criterion, optimizer)

device id cuda:0
base lr: 1.92e-03
actual lr: 4.69e-05
accumulate grad iterations: 1
effective batch size: 5
Forward pass time: 10.131988 seconds
Forward and backward pass time: 30.976710 seconds
FLOPs: 3813.998093 GFLOPs


In [30]:
args.model = 'RmGPT'
#释放显存
torch.cuda.empty_cache()
Exp = Exp_All_Task_SUP(args)
model  = Exp.model
optimizer = Exp._select_optimizer()  # 定义优化器
evaluate_model_efficiency(model, input, criterion, optimizer)

device id cuda:0
base lr: 3.00e-04
actual lr: 3.00e-04
accumulate grad iterations: 1
effective batch size: 32
Forward pass time: 0.049698 seconds
Forward and backward pass time: 0.220226 seconds
FLOPs: 654.047087 GFLOPs
GPU Memory Usage: 60.33 GB / 192.00 GB


In [39]:
torch.cuda.empty_cache()
for name, param in model.named_parameters():
        #仅仅训练网络中保存的任务参数
        if  'prompt_token' in name or 'mask_prompt' in name or 'cls_prompt' in name or 'mask_token' in name  or 'cls_token' in name or 'category_token' in name :
            param.requires_grad = True
            print("trainable:", name)
        else:
            param.requires_grad = False
optimizer = Exp._select_optimizer()  # 定义优化器
evaluate_model_efficiency(model, input, criterion, optimizer)            

trainable: prompt_tokens.SMU
trainable: prompt_tokens.PHM_Challenge2024
trainable: cls_tokens.PHM_SMU
trainable: cls_tokens.PHM_Challenge2024
trainable: category_tokens.PHM_SMU
trainable: category_tokens.PHM_Challenge2024
trainable: cls_token_head.weight
trainable: cls_token_head.bias
base lr: 3.00e-04
actual lr: 3.00e-04
accumulate grad iterations: 1
effective batch size: 32
Forward pass time: 0.034220 seconds
Forward and backward pass time: 0.067653 seconds
FLOPs: 654.047087 GFLOPs
GPU Memory Usage: 57.17 GB / 192.00 GB


In [41]:
trainable_params = 0
non_trainable_params = 0

for name, param in model.named_parameters():
    # 仅仅训练网络中保存的任务参数
    if 'prompt_token' in name or 'mask_prompt' in name or 'cls_prompt' in name or 'mask_token' in name or 'cls_token' in name or 'category_token' in name:
        param.requires_grad = True
        trainable_params += param.numel()
        print("trainable:", name)
    else:
        param.requires_grad = False
        non_trainable_params += param.numel()

print(f"Total trainable parameters: {trainable_params/1e6}")
print(f"Total non-trainable parameters: {non_trainable_params/1e6}")

trainable: prompt_tokens.SMU
trainable: prompt_tokens.PHM_Challenge2024
trainable: cls_tokens.PHM_SMU
trainable: cls_tokens.PHM_Challenge2024
trainable: category_tokens.PHM_SMU
trainable: category_tokens.PHM_Challenge2024
trainable: cls_token_head.weight
trainable: cls_token_head.bias
Total trainable parameters: 3.037696
Total non-trainable parameters: 66.048257
