In [1]:
import importlib
import argparse
import os
import sys
import time
import wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

import random
import numpy as np
import yaml
from box import Box
from pprint import pprint
import wandb
from tqdm import tqdm
from datetime import timedelta
from collections import defaultdict

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import numpy as np

from einops import rearrange, repeat
from einops.layers.torch import Rearrange

CONFIG_FILENAME = '/home/liranc6/ecg_forecasting/liran_project/mrdiff/src/config_ecg.yml'

assert CONFIG_FILENAME.endswith('.yml')

with open(CONFIG_FILENAME, 'r') as file:
    config = yaml.safe_load(file)

# Add the parent directory to the sys.path
ProjectPath = config['project_path']
sys.path.append(ProjectPath)

from liran_project.mrdiff.src.parser import parse_args
from liran_project.utils.dataset_loader import SingleLeadECGDatasetCrops_mrDiff as DataSet
from liran_project.utils.util import ecg_signal_difference
import liran_project.mrdiff.exp_main
# from liran_project.mrdiff.exp_main import Exp_Main
from liran_project.utils.common import *

# Add the directory containing the exp module to the sys.path
exp_module_path = os.path.join(ProjectPath, 'mrDiff')
sys.path.append(exp_module_path)

# from mrDiff.exp.exp_main import Exp_Main
from mrDiff.data_process.etth_dataloader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_Wind, Dataset_Caiso, Dataset_Production, Dataset_Caiso_M, Dataset_Production_M
from mrDiff.data_process.financial_dataloader import DatasetH
from mrDiff.data_process.forecast_dataloader import ForecastDataset
from mrDiff.exp.exp_basic import Exp_Basic
from mrDiff.models_diffusion import DDPM
from mrDiff.utils.tools import EarlyStopping, adjust_learning_rate, visual
from mrDiff.utils.metrics import metric

from liran_project.mrdiff.src.parser import Args

In [2]:
args = Args(CONFIG_FILENAME)

# Now you can use args as needed
pprint(vars(args))

{'config': Box({'project_path': '/home/liranc6/ecg_forecasting', 'tqdm': 'terminal', 'resume': {'resume': True, 'resume_from': False, 'resume_optimizer': True, 'resume_epoch': 'None', 'loss_and_metrics': True, 'resume_scheduler': True, 'resume_configuration': True, 'specific_chpt_path': '/home/liranc6/ecg_forecasting/liran_project/results/icentia11k/mrDiff/DDPM_icentia11k_ftS_sl10_ll105_pl30_lr0.001_bs8_invFalse_itr0/09_10_2024_1143/best_checkpoint.pth', 'was_resumed': False}, 'wandb': {'entity': 'liranc6', 'mode': 'disabled', 'project': 'mrdiff', 'resume': 'None', 'run_name': 'None', 'id': 'None', 'save_code': True, 'resume_from': 'None'}, 'general': {'random_seed': 42, 'evaluate': False, 'tag': None, 'dataset': 'icentia11k', 'features': 'S', 'training_mode': 'ONE', 'interval': 1000}, 'optimization': {'learning_rate': 0.001, 'batch_size': 8, 'test_batch_size': 8, 'patience': 10, 'weight_decay': 1e-05, 'lradj': '3', 'pct_start': 0.3}, 'hardware': {'print_gpu_memory_usage': False, 'num_

In [3]:
# Convert Box object to dictionary
config_dict = args.configs.to_dict()

# Access the configuration values using dictionary syntax
random_seed = config_dict['general']['random_seed']
tag = config_dict['general']['tag']
dataset = config_dict['general']['dataset']
features = config_dict['general']['features']

learning_rate = config_dict['optimization']['learning_rate']
batch_size = config_dict['optimization']['batch_size']

context_len = config_dict['training']['sequence']['context_len']
label_len = config_dict['training']['sequence']['label_len']
model = config_dict['training']['model_info']['model']
pred_len = config_dict['training']['sequence']['pred_len']
iterations = config_dict['training']['iterations']['itr']

inverse = config_dict['data']['inverse']
    
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True  # Can change it to False --> default: False
torch.backends.cudnn.enabled = True


In [4]:
# wandb
wandb_init_config ={
        "entity": args.wandb.entity,
        "mode": args.wandb.mode,
        "project": args.wandb.project,
        "save_code": args.wandb.save_code,
    }
wandb_init_config

{'entity': 'liranc6',
 'mode': 'disabled',
 'project': 'mrdiff',
 'save_code': True}

In [5]:
wandb_project_name = args.wandb.project
wandb_id = args.wandb.id if args.wandb.id != "None" else None
wandb_mode = args.wandb.mode if args.wandb.mode != "None" else "online"
wandb_resume = args.wandb.resume if args.wandb.resume != "None" else None
wandb.init(project=wandb_project_name, id=wandb_id, resume=wandb_resume, mode=wandb_mode)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




In [6]:
if args.wandb.resume != "None":
    wandb_init_config.update({
                            "id": args.wandb.id,
                            "resume": args.wandb.resume
                            })
    
    if args.wandb.resume_from != "None":
        wandb_init_config["config"] = args.wandb.resume_from
        
    run = wandb.init(**wandb_init_config)
    print(f"Resuming wandb run id: {wandb.run.id}")
    
    def log_config_diffs(old_config, new_config, step):
        diffs = {}
        for key in new_config:
            if key not in old_config or old_config[key] != new_config[key]:
                diffs[key] = {'old': old_config.get(key), 'new': new_config[key]}
    
        if diffs:
            note = f"Config changes at step {step}:\n"
            for key, value in diffs.items():
                note += f"{key}: {value['old']} -> {value['new']}\n"
            wandb.run.notes = (wandb.run.notes or "") + note + "\n\nAdditional information added later:\n"
    
    old_config = wandb.config.copy()
    wandb.config.update(args)
    new_config = wandb.config.copy()
    log_config_diffs(old_config, new_config, step="update_args")           
else:
    wandb.init(**wandb_init_config, config=args, )
    print(f"New wandb run id: {wandb.run.id}")


New wandb run id: 70w9g6ch


In [7]:
fix_seed = random_seed
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

iteration = 1
# setting
setting = f"{model}_{dataset}_ft{features}_sl{context_len}_ll{label_len}_pl{pred_len}_lr{learning_rate}_bs{batch_size}_inv{inverse}_itr{iteration}"

if tag is not None:
    setting += f"_{tag}"

setting

'DDPM_icentia11k_ftS_sl10_ll105_pl30_lr0.001_bs8_invFalse_itr1'

In [8]:
exp = liran_project.mrdiff.exp_main.Exp_Main(args)

Use GPU: cuda:0


In [11]:
exp.args.configs_filename

'/home/liranc6/ecg_forecasting/liran_project/mrdiff/src/config_ecg.yml'

In [10]:
exp.args.to_dict()

{'project_path': '/home/liranc6/ecg_forecasting',
 'tqdm': 'terminal',
 'resume': {'resume': True,
  'resume_from': False,
  'resume_optimizer': True,
  'resume_epoch': 'None',
  'loss_and_metrics': True,
  'resume_scheduler': True,
  'resume_configuration': True,
  'specific_chpt_path': '/home/liranc6/ecg_forecasting/liran_project/results/icentia11k/mrDiff/DDPM_icentia11k_ftS_sl10_ll105_pl30_lr0.001_bs8_invFalse_itr0/09_10_2024_1143/best_checkpoint.pth',
  'was_resumed': False},
 'wandb': {'entity': 'liranc6',
  'mode': 'disabled',
  'project': 'mrdiff',
  'resume': 'None',
  'run_name': 'None',
  'id': 'None',
  'save_code': True,
  'resume_from': 'None'},
 'general': {'random_seed': 42,
  'evaluate': False,
  'tag': None,
  'dataset': 'icentia11k',
  'features': 'S',
  'training_mode': 'ONE',
  'interval': 1000},
 'optimization': {'learning_rate': 0.001,
  'batch_size': 8,
  'test_batch_size': 8,
  'patience': 10,
  'weight_decay': 1e-05,
  'lradj': '3',
  'pct_start': 0.3},
 'hardw

In [12]:
exp.read_data('train')
exp.read_data('val')
exp.read_data('test')

Reading 00000:   0%|          | 0/2 [00:01<?, ?it/s]


KeyboardInterrupt: 

In [10]:
# Reload the module
importlib.reload(liran_project.mrdiff.exp_main)

# Assuming `exp` is an existing instance of `Exp_Main`
exp.__class__ = liran_project.mrdiff.exp_main.Exp_Main

torch.cuda.empty_cache()
! gpustat

[1m[37mnlp-2080-1                   [m  Wed Oct  9 13:23:09 2024  [1m[30m535.146.02[m
[36m[0][m [34mNVIDIA GeForce RTX 2080 Ti[m |[31m 26°C[m, [32m  0 %[m | [36m[1m[33m  456[m / [33m11264[m MB | [1m[30mliranc6[m([33m200M[m)


In [13]:
exp.print_attributes()

args: <liran_project.mrdiff.src.parser.Args object at 0x7fe3d01aab00>
device: cuda:0
model: Model(
  (base_models): ModuleList(
    (0-3): 4 x BaseMapping(
      (Linear_Trend): ModuleList(
        (0): Linear(in_features=10, out_features=30, bias=True)
      )
      (rev): RevIN()
    )
  )
  (decompsitions): ModuleList(
    (0): series_decomp(
      (moving_avg): moving_avg(
        (avg): AvgPool1d(kernel_size=(5,), stride=(1,), padding=(0,))
      )
    )
    (1): series_decomp(
      (moving_avg): moving_avg(
        (avg): AvgPool1d(kernel_size=(25,), stride=(1,), padding=(0,))
      )
    )
    (2): series_decomp(
      (moving_avg): moving_avg(
        (avg): AvgPool1d(kernel_size=(51,), stride=(1,), padding=(0,))
      )
    )
  )
  (u_nets): ModuleList(
    (0-2): 3 x My_DiffusionUnet_v0(
      (diffusion_embedding): DiffusionEmbedding(
        (projection1): Linear(in_features=256, out_features=256, bias=True)
        (projection2): Linear(in_features=256, out_features=256, 

In [15]:
print(f'>>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>')
try:
    with torch.profiler.profile(
        activities=[
            # torch.profiler.ProfilerActivity.CPU,
            torch.profiler.ProfilerActivity.CUDA,
        ],
        record_shapes=True,
        profile_memory=True,
        with_stack=True
    ) as prof:
        exp.train(setting)
except Exception as e:
    print(f'An error occurred during training: {e}')

>>>>>>>start training : DDPM_icentia11k_ftS_sl10_ll105_pl30_lr0.001_bs8_invFalse_itr1>>>>>>>>>>>>>>>>>>>>>>>>>
An error occurred during training: PytorchStreamReader failed locating file data.pkl: file not found


STAGE:2024-10-08 20:08:25 1504372:1504372 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2024-10-08 20:08:25 1504372:1504372 ActivityProfilerController.cpp:320] Completed Stage: Collection
STAGE:2024-10-08 20:08:25 1504372:1504372 ActivityProfilerController.cpp:324] Completed Stage: Post Processing


In [12]:
import torch

# Assuming `prof` is your profiler object
key_averages = prof.key_averages()

# Filter to include only CUDA operations
cuda_operations = [item for item in key_averages if 'cuda' in item.key]

# Print the filtered table
print(key_averages.table(sort_by="self_cuda_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               [memory]         0.00%       0.000us         0.00%       0.000us       0.000us       0.000us         0.00%       0.000us       0.000us      18.31 Mb      18.31 Mb      78.08 Gb      78.08 G

In [None]:
import torch
import torch.profiler

# Assuming `prof` is your profiler object
key_averages = prof.key_averages()

# Total CUDA memory usage
total_cuda_memory_usage = sum(item.self_cuda_memory_usage for item in key_averages)
print(f"Total CUDA Memory Usage: {total_cuda_memory_usage} bytes")

# Memory usage by function
print(key_averages.table(sort_by="self_cuda_memory_usage", row_limit=10))

Total CUDA Memory Usage: 83842590208 bytes
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               [memory]         0.00%       0.000us         0.00%       0.000us       0.000us       0.000us         0.00%       0.000us       0.000us      18.31 

In [None]:
print(prof.key_averages().table(sort_by="self_cuda_memory_usage", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                               [memory]         0.00%       0.000us         0.00%       0.000us       0.000us       0.000us         0.00%       0.000us       0.000us      18.31 Mb      18.31 Mb      78.08 Gb      78.08 G

In [15]:
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 25           |        cudaMalloc retries: 26        |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |  66618 KiB |  80015 MiB | 104888 MiB | 104823 MiB |
|       from large pool |  22384 KiB |  79971 MiB | 104827 MiB | 104805 MiB |
|       from small pool |  44234 KiB |     44 MiB |     60 MiB |     17 MiB |
|---------------------------------------------------------------------------|
| Active memory         |  66618 KiB |  80015 MiB | 104888 MiB | 104823 MiB |
|       from large pool |  22384 KiB |  79971 MiB | 104827 MiB | 104805 MiB |
|       from small pool |  44234 KiB |     44 MiB |     60 MiB |     17 MiB |
|---------------------------------------------------------------

In [16]:
# exp.model_start_training_time = "02_10_2024_135344"

In [17]:
print(f'>>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
exp.test(setting, test=1)

>>>>>>>testing : DDPM_icentia11k_ftS_sl10_ll105000_pl45000_lr0.001_bs8_invFalse_itr1<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
loading model


FileNotFoundError: [Errno 2] No such file or directory: '/home/liranc6/ecg_forecasting/liran_project/results/icentia11k/mrDiff/DDPM_icentia11k_ftS_sl10_ll105000_pl45000_lr0.001_bs8_invFalse_itr1/05_10_2024_0030/checkpoint.pth'

In [7]:
for iteration in range(iterations):
    # setting record of experiments

    # random seed
    
    # setting
    setting = f"{model}_{dataset}_ft{features}_sl{context_len}_ll{label_len}_pl{pred_len}_lr{learning_rate}_bs{batch_size}_inv{inverse}_itr{iteration}"
    
    if tag is not None:
        setting += f"_{tag}"

    exp = Exp_Main(args)

    print(f'>>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>')
    exp.train(setting)

    print(f'>>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
    exp.test(setting, test=1)
    
    torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : DDPM_icentia11k_ftS_sl1_ll1500_pl1500_lr0.001_bs8_invFalse_itr0>>>>>>>>>>>>>>>>>>>>>>>>>


Reading 00001: 100%|██████████| 2/2 [00:24<00:00, 12.28s/it]
Reading 00001: 100%|██████████| 2/2 [00:03<00:00,  1.90s/it]
Reading 00042: 100%|██████████| 3/3 [00:04<00:00,  1.58s/it]


KeyboardInterrupt: 

In [None]:
args.configs.general.random_seed

In [None]:
total_num_samples = 0

with h5py.File(filename, 'r') as h5_file:
    num_keys = len(self.keys)
    pbar_keys = tqdm(self.keys, total=num_keys, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]')
    
    start_time = time.time()
    pbar_keys.set_description(f"creating_stats")
    for key in pbar_keys:
        data = h5_file[key][()][:, 0, :] if self.data_with_RR else h5_file[key][()]
            
        curr_num_samples = data.shape[0]
        total_num_samples += curr_num_samples

        max_val = max(max_val, np.max(data))
        min_val = min(min_val, np.min(data))
        
        # Calculate elapsed time
        elapsed_time = time.time() - start_time
        
        # Update the postfix
        pbar_keys.set_postfix({"time_elapsed": str(timedelta(seconds=int(elapsed_time)))})
        
        

In [None]:
def run_experiments(iterations, random_seed, model, dataset, features, seq_len,
                    label_len, pred_len, learning_rate, batch_size, inverse, tag, args):
    mae_ = []
    mse_ = []
    rmse_ = []
    mape_ = []
    mspe_ = []
    rse_ = []
    corr_ = []
    nrmse_ = []

    for iter in range(iterations):
        # setting record of experiments

        # random seed
        fix_seed = iter if iterations > 1 else random_seed

        random.seed(fix_seed)
        torch.manual_seed(fix_seed)
        np.random.seed(fix_seed)
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True  # Can change it to False --> default: False
        torch.backends.cudnn.enabled = True

        setting = f"{model}_{dataset}_ft{features}_sl{seq_len}_ll{label_len}_pl{pred_len}_lr{learning_rate}_bs{batch_size}_inv{inverse}_itr{iter}"
        if tag is not None:
            setting += f"_{tag}"

        exp = Exp_Main(args)

        print(f'>>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>>')
        exp.train(setting)

        print(f'>>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
        mae, mse, rmse, mape, mspe, rse, corr, nrmse = exp.test(setting, test=1)

        mae_.append(mae)
        mse_.append(mse)
        rmse_.append(rmse)
        mape_.append(mape)
        mspe_.append(mspe)
        rse_.append(rse)
        corr_.append(corr)
        nrmse_.append(nrmse)

        torch.cuda.empty_cache()

    print('Final mean normed: ')
    print('> mae:{:.4f}, std:{:.4f}'.format(np.mean(mae_), np.std(mae_)))
    print('> mse:{:.4f}, std:{:.4f}'.format(np.mean(mse_), np.std(mse_)))
    print('> rmse:{:.4f}, std:{:.4f}'.format(np.mean(rmse_), np.std(rmse_)))
    print('> mape:{:.4f}, std:{:.4f}'.format(np.mean(mape_), np.std(mape_)))
    print('> rse:{:.4f}, std:{:.4f}'.format(np.mean(rse_), np.std(rse_)))
    print('> corr:{:.4f}, std:{:.4f}'.format(np.mean(corr_), np.std(corr_)))
    print('> nrmse:{:.4f}, std:{:.4f}'.format(np.mean(nrmse_), np.std(nrmse_)))

    return {
        'mae': (np.mean(mae_), np.std(mae_)),
        'mse': (np.mean(mse_), np.std(mse_)),
        'rmse': (np.mean(rmse_), np.std(rmse_)),
        'mape': (np.mean(mape_), np.std(mape_)),
        'rse': (np.mean(rse_), np.std(rse_)),
        'corr': (np.mean(corr_), np.std(corr_)),
        'nrmse': (np.mean(nrmse_), np.std(nrmse_))
    }
    
results = run_experiments(iterations, random_seed, model, dataset, features, seq_len,
                            label_len, pred_len, learning_rate, batch_size, inverse, tag, args)