In [1]:
import importlib
import argparse
import os
import sys
import time
import wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

import random
import numpy as np
import yaml
from box import Box
from pprint import pprint
import wandb
from tqdm import tqdm
from datetime import timedelta
from collections import defaultdict

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import numpy as np

from einops import rearrange, repeat
from einops.layers.torch import Rearrange

CONFIG_FILENAME = '/home/liranc6/ecg_forecasting/liran_project/mrdiff/src/config_ecg.yml'

assert CONFIG_FILENAME.endswith('.yml')

with open(CONFIG_FILENAME, 'r') as file:
    config = yaml.safe_load(file)

# Add the parent directory to the sys.path
ProjectPath = config['project_path']
sys.path.append(ProjectPath)

from liran_project.mrdiff.src.parser import parse_args
from liran_project.utils.dataset_loader import SingleLeadECGDatasetCrops_mrDiff as DataSet
from liran_project.utils.util import ecg_signal_difference
import liran_project.mrdiff.exp_main
# from liran_project.mrdiff.exp_main import Exp_Main

# Add the directory containing the exp module to the sys.path
exp_module_path = os.path.join(ProjectPath, 'mrDiff')
sys.path.append(exp_module_path)

# from mrDiff.exp.exp_main import Exp_Main
from mrDiff.data_process.etth_dataloader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_Wind, Dataset_Caiso, Dataset_Production, Dataset_Caiso_M, Dataset_Production_M
from mrDiff.data_process.financial_dataloader import DatasetH
from mrDiff.data_process.forecast_dataloader import ForecastDataset
from mrDiff.exp.exp_basic import Exp_Basic
from mrDiff.models_diffusion import DDPM
from mrDiff.utils.tools import EarlyStopping, adjust_learning_rate, visual
from mrDiff.utils.metrics import metric

from liran_project.mrdiff.src.parser import Args

In [2]:
args = Args(CONFIG_FILENAME)

# Now you can use args as needed
pprint(vars(args))

{'config': Box({'project_path': '/home/liranc6/ecg_forecasting', 'wandb': {'entity': 'liranc6', 'mode': 'disabled', 'project': 'mrdiff', 'resume': 'None', 'run_name': 'first_run_ever', 'id': 'None', 'save_code': True, 'resume_from': 'None'}, 'general': {'random_seed': 42, 'evaluate': False, 'tag': None, 'dataset': 'icentia11k', 'features': 'S', 'training_mode': 'ONE', 'interval': 1000}, 'optimization': {'learning_rate': 0.001, 'batch_size': 16, 'test_batch_size': 8, 'patience': 10, 'weight_decay': 1e-05, 'lradj': '3', 'pct_start': 0.3}, 'hardware': {'print_gpu_memory_usage': True, 'num_workers': 0, 'use_gpu': True, 'gpu': 0, 'use_multi_gpu': False, 'devices': '0', 'device_ids': [0]}, 'paths': {'train_data': '/home/liranc6/data/with_R_beats/icentia11k-continuous-ecg_normal_sinus_subset_npArrays_splits/10minutes/train/p0_to_p32.h5', 'validation_data': '/home/liranc6/data/with_R_beats/icentia11k-continuous-ecg_normal_sinus_subset_npArrays_splits/10minutes/val/p33_to_p39.h5', 'test_data': 

In [3]:
# Convert Box object to dictionary
config_dict = args.config.to_dict()

# Access the configuration values using dictionary syntax
random_seed = config_dict['general']['random_seed']
tag = config_dict['general']['tag']
dataset = config_dict['general']['dataset']
features = config_dict['general']['features']

learning_rate = config_dict['optimization']['learning_rate']
batch_size = config_dict['optimization']['batch_size']

context_len = config_dict['training']['sequence']['context_len']
label_len = config_dict['training']['sequence']['label_len']
model = config_dict['training']['model_info']['model']
pred_len = config_dict['training']['sequence']['pred_len']
iterations = config_dict['training']['iterations']['itr']

inverse = config_dict['data']['inverse']
    
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True  # Can change it to False --> default: False
torch.backends.cudnn.enabled = True


In [4]:

# wandb
wandb_init_config ={
        "mode": args.wandb.mode,
        "project": args.wandb.project,
        "save_code": args.wandb.save_code,
    }
wandb_init_config

{'mode': 'disabled', 'project': 'mrdiff', 'save_code': True}

In [5]:
if args.wandb.resume != "None":
    wandb_init_config.update({
                            "id": args.wandb.resume,
                            "resume": args.wandb.resume
                            })
    
    if args.wandb.resume_from != "None":
        wandb_init_config["config"] = args.wandb.resume_from
        
    run = wandb.init(**wandb_init_config)
    print(f"Resuming wandb run id: {wandb.run.id}")
    
    def log_config_diffs(old_config, new_config, step):
        diffs = {}
        for key in new_config:
            if key not in old_config or old_config[key] != new_config[key]:
                diffs[key] = {'old': old_config.get(key), 'new': new_config[key]}
    
        if diffs:
            note = f"Config changes at step {step}:\n"
            for key, value in diffs.items():
                note += f"{key}: {value['old']} -> {value['new']}\n"
            wandb.run.notes = (wandb.run.notes or "") + note + "\n\nAdditional information added later:\n"
    
    old_config = wandb.config.copy()
    wandb.config.update(args)
    new_config = wandb.config.copy()
    log_config_diffs(old_config, new_config, step="update_args")
            
else:
    wandb.init(**wandb_init_config, config=args, )
    print(f"New wandb run id: {wandb.run.id}")
    
    


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


New wandb run id: n5j4orjo


In [6]:
fix_seed = random_seed
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

iteration = 1
# setting
setting = f"{model}_{dataset}_ft{features}_sl{context_len}_ll{label_len}_pl{pred_len}_lr{learning_rate}_bs{batch_size}_inv{inverse}_itr{iteration}"

if tag is not None:
    setting += f"_{tag}"


In [7]:
# Reload the module
importlib.reload(liran_project.mrdiff.exp_main)

<module 'liran_project.mrdiff.exp_main' from '/home/liranc6/ecg_forecasting/liran_project/mrdiff/exp_main.py'>

In [8]:
exp = liran_project.mrdiff.exp_main.Exp_Main(args)

Use GPU: cuda:0


In [9]:
exp.read_data('train')
exp.read_data('val')
exp.read_data('test')

Reading 00002: 100%|██████████| 3/3 [00:03<00:00,  1.19s/it]
Reading 00002: 100%|██████████| 3/3 [00:04<00:00,  1.38s/it]
Reading 00041: 100%|██████████| 2/2 [00:01<00:00,  1.42it/s]


In [11]:
# Reload the module
importlib.reload(liran_project.mrdiff.exp_main)

# Assuming `exp` is an existing instance of `Exp_Main`
exp.__class__ = liran_project.mrdiff.exp_main.Exp_Main

In [12]:
print(f'>>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>')
with torch.profiler.profile(
    activities=[
        # torch.profiler.ProfilerActivity.CPU,
        torch.profiler.ProfilerActivity.CUDA,
    ],
    record_shapes=True,
    profile_memory=True,
    with_stack=True
) as prof:
    exp.train(setting)

>>>>>>>start training : DDPM_icentia11k_ftS_sl1_ll15000_pl15000_lr0.001_bs16_invFalse_itr1>>>>>>>>>>>>>>>>>>>>>>>>>
Saving model to /home/liranc6/ecg_forecasting/liran_project/results/icentia11k/mrDiff/DDPM_icentia11k_ftS_sl1_ll15000_pl15000_lr0.001_bs16_invFalse_itr1/03_10_2024_0742


STAGE:2024-10-03 07:42:40 2032846:2032846 ActivityProfilerController.cpp:314] Completed Stage: Warm Up


epochs_pbar:   0%|          | 0/2 [00:00<?, ?it/s]

train_loader_pbar:   0%|          | 0/20 [00:00<?, ?it/s]

GPU cuda:0 (NVIDIA A100-SXM4-80GB) Memory Usage:
  Total Memory: 79.15 GB
  Reserved Memory: 0.04 GB
  Allocated Memory: 0.04 GB
  Free Memory: 0.00 GB
epoch: 0, i: 0{'gpu_name': 'NVIDIA A100-SXM4-80GB', 'total_memory_gb': 79.1510009765625, 'reserved_memory_gb': 0.044921875, 'allocated_memory_gb': 0.04173755645751953, 'free_memory_gb': 0.0031843185424804688}
GPU cuda:0 (NVIDIA A100-SXM4-80GB) Memory Usage:
  Total Memory: 79.15 GB
  Reserved Memory: 0.05 GB
  Allocated Memory: 0.04 GB
  Free Memory: 0.00 GB
gpu_prints=0
check_gpu_memory_usage(self.device):
{'gpu_name': 'NVIDIA A100-SXM4-80GB', 'total_memory_gb': 79.1510009765625, 'reserved_memory_gb': 0.046875, 'allocated_memory_gb': 0.04352569580078125, 'free_memory_gb': 0.00334930419921875}
GPU cuda:0 (NVIDIA A100-SXM4-80GB) Memory Usage:
  Total Memory: 79.15 GB
  Reserved Memory: 0.06 GB
  Allocated Memory: 0.05 GB
  Free Memory: 0.01 GB
gpu_prints=1
check_gpu_memory_usage(self.device):
{'gpu_name': 'NVIDIA A100-SXM4-80GB', 'total_

vali_loader_pbar:   0%|          | 0/20 [00:00<?, ?it/s]

STAGE:2024-10-03 07:54:22 2032846:2032846 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-10-03 07:54:24 2032846:2032846 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [1]:
import torch

# Assuming `prof` is your profiler object
key_averages = prof.key_averages()

# Filter to include only CUDA operations
cuda_operations = [item for item in key_averages if 'cuda' in item.key]

# Print the filtered table
print(torch.profiler.profile.key_averages().table(cuda_operations, sort_by="self_cuda_memory_usage", row_limit=10))

NameError: name 'prof' is not defined

In [13]:
import torch
import torch.profiler

# Assuming `prof` is your profiler object
key_averages = prof.key_averages()

# Total CUDA memory usage
total_cuda_memory_usage = sum(item.self_cuda_memory_usage for item in key_averages)
print(f"Total CUDA Memory Usage: {total_cuda_memory_usage} bytes")

# Memory usage by function
print(key_averages.table(sort_by="self_cuda_memory_usage", row_limit=10))

Total CUDA Memory Usage: -138416653312 bytes
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                       cudaLaunchKernel        10.46%       14.072s        10.46%       14.072s       7.432us       0.000us         0.00%       0.000us       0.000us     -13.8

In [14]:
print(prof.key_averages().table(sort_by="self_cuda_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                       cudaLaunchKernel        10.46%       14.072s        10.46%       14.072s       7.432us       0.000us         0.00%       0.000us       0.000us     -13.89 Mb     -13.89 Mb    7722.14 Gb    7718.10 G

In [15]:
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      | 103107 KiB |  61048 MiB |  61782 GiB |  61782 GiB |
|       from large pool |  16640 KiB |  60893 MiB |  61708 GiB |  61708 GiB |
|       from small pool |  86467 KiB |    212 MiB |     74 GiB |     74 GiB |
|---------------------------------------------------------------------------|
| Active memory         | 103107 KiB |  61048 MiB |  61782 GiB |  61782 GiB |
|       from large pool |  16640 KiB |  60893 MiB |  61708 GiB |  61708 GiB |
|       from small pool |  86467 KiB |    212 MiB |     74 GiB |     74 GiB |
|---------------------------------------------------------------

In [16]:
# exp.model_start_training_time = "02_10_2024_135344"

In [17]:
print(f'>>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
exp.test(setting, test=1)

>>>>>>>testing : DDPM_icentia11k_ftS_sl1_ll15000_pl15000_lr0.001_bs16_invFalse_itr1<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
loading model


FileNotFoundError: [Errno 2] No such file or directory: '/home/liranc6/ecg_forecasting/liran_project/results/icentia11k/mrDiff/DDPM_icentia11k_ftS_sl1_ll15000_pl15000_lr0.001_bs16_invFalse_itr1/02_10_2024_135344/checkpoint.pth'

In [7]:
for iteration in range(iterations):
    # setting record of experiments

    # random seed
    
    # setting
    setting = f"{model}_{dataset}_ft{features}_sl{context_len}_ll{label_len}_pl{pred_len}_lr{learning_rate}_bs{batch_size}_inv{inverse}_itr{iteration}"
    
    if tag is not None:
        setting += f"_{tag}"

    exp = Exp_Main(args)

    print(f'>>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>')
    exp.train(setting)

    print(f'>>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
    exp.test(setting, test=1)
    
    torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : DDPM_icentia11k_ftS_sl1_ll1500_pl1500_lr0.001_bs8_invFalse_itr0>>>>>>>>>>>>>>>>>>>>>>>>>


Reading 00001: 100%|██████████| 2/2 [00:24<00:00, 12.28s/it]
Reading 00001: 100%|██████████| 2/2 [00:03<00:00,  1.90s/it]
Reading 00042: 100%|██████████| 3/3 [00:04<00:00,  1.58s/it]


KeyboardInterrupt: 

In [None]:
args.config.general.random_seed

In [None]:
def run_experiments(iterations, random_seed, model, dataset, features, seq_len,
                    label_len, pred_len, learning_rate, batch_size, inverse, tag, args):
    mae_ = []
    mse_ = []
    rmse_ = []
    mape_ = []
    mspe_ = []
    rse_ = []
    corr_ = []
    nrmse_ = []

    for iter in range(iterations):
        # setting record of experiments

        # random seed
        fix_seed = iter if iterations > 1 else random_seed

        random.seed(fix_seed)
        torch.manual_seed(fix_seed)
        np.random.seed(fix_seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True  # Can change it to False --> default: False
        torch.backends.cudnn.enabled = True

        setting = f"{model}_{dataset}_ft{features}_sl{seq_len}_ll{label_len}_pl{pred_len}_lr{learning_rate}_bs{batch_size}_inv{inverse}_itr{iter}"
        if tag is not None:
            setting += f"_{tag}"

        exp = Exp_Main(args)

        print(f'>>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>>')
        exp.train(setting)

        print(f'>>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
        mae, mse, rmse, mape, mspe, rse, corr, nrmse = exp.test(setting, test=1)

        mae_.append(mae)
        mse_.append(mse)
        rmse_.append(rmse)
        mape_.append(mape)
        mspe_.append(mspe)
        rse_.append(rse)
        corr_.append(corr)
        nrmse_.append(nrmse)

        torch.cuda.empty_cache()

    print('Final mean normed: ')
    print('> mae:{:.4f}, std:{:.4f}'.format(np.mean(mae_), np.std(mae_)))
    print('> mse:{:.4f}, std:{:.4f}'.format(np.mean(mse_), np.std(mse_)))
    print('> rmse:{:.4f}, std:{:.4f}'.format(np.mean(rmse_), np.std(rmse_)))
    print('> mape:{:.4f}, std:{:.4f}'.format(np.mean(mape_), np.std(mape_)))
    print('> rse:{:.4f}, std:{:.4f}'.format(np.mean(rse_), np.std(rse_)))
    print('> corr:{:.4f}, std:{:.4f}'.format(np.mean(corr_), np.std(corr_)))
    print('> nrmse:{:.4f}, std:{:.4f}'.format(np.mean(nrmse_), np.std(nrmse_)))

    return {
        'mae': (np.mean(mae_), np.std(mae_)),
        'mse': (np.mean(mse_), np.std(mse_)),
        'rmse': (np.mean(rmse_), np.std(rmse_)),
        'mape': (np.mean(mape_), np.std(mape_)),
        'rse': (np.mean(rse_), np.std(rse_)),
        'corr': (np.mean(corr_), np.std(corr_)),
        'nrmse': (np.mean(nrmse_), np.std(nrmse_))
    }
    
results = run_experiments(iterations, random_seed, model, dataset, features, seq_len,
                            label_len, pred_len, learning_rate, batch_size, inverse, tag, args)