In [1]:
import argparse
import os
import sys
from typing import Any, Dict
from accelerate import Accelerator
from accelerate import DistributedType, DistributedDataParallelKwargs
import pytorch_lightning as pl
import torch
from lightning.pytorch.loggers import WandbLogger
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.strategies.ddp import DDPStrategy
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, WeightedRandomSampler
import pickle
import pandas as pd
from transformers import LongformerTokenizer
from torch.utils.data import Dataset
from peft import get_peft_model, LoraConfig, TaskType
from collections import Counter
import math
from datasets import EHR_Longformer_Dataset
from models.longformernormal import LongformerPretrainNormal, LongformerFinetune

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data import DataLoader
import wandb
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
from accelerate import Accelerator
from accelerate import DistributedType
import os
from utils.utils import seed_everything
from transformers import LongformerTokenizer
from datasets import EHR_Longformer_Dataset
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm
import torch.nn.functional as F
from models.longformernormal import LongformerPretrainNormal
from torch.optim.lr_scheduler import LinearLR, SequentialLR, ExponentialLR, LambdaLR, CosineAnnealingWarmRestarts
from pretrain_train import train
import logging
import sys
from torch.utils.data.distributed import DistributedSampler
from torch.optim.lr_scheduler import _LRScheduler

In [9]:
parser = argparse.ArgumentParser()
    
# Required parameters
parser.add_argument("--exp_name", type=str, default="mortality90_CE_loss_reducelr_dropout")
parser.add_argument("--save_path", type=str, default="./results")
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--checkpoint_dir", type=str, default="./checkpoints")

# Model parameters
parser.add_argument("--mode", type=str, default="mortality90")
parser.add_argument("--vocab_size", type=int, default=50265)
parser.add_argument("--itemid_size", type=int, default=4016)
parser.add_argument("--unit_size", type=int, default=60)
parser.add_argument("--gender_size", type=int, default=2)
parser.add_argument("--continuous_size", type=int, default=3)
parser.add_argument("--task_size", type=int, default=5)
parser.add_argument("--max_position_embeddings", type=int, default=5000)
parser.add_argument("--max_age", type=int, default=100)
parser.add_argument("--batch_size", type=int, default=8)
parser.add_argument("--resume", type=bool, default=False)
parser.add_argument("--pin_memory", type=bool, default=True)
parser.add_argument("--nodes", type=int, default=1)
parser.add_argument("--gpus", type=int, default=2)
parser.add_argument("--start_epoch", type=int, default=0)
parser.add_argument("--epochs", type=int, default=200)
parser.add_argument("--log_every_n_steps", type=int, default=100)
parser.add_argument("--acc", type=int, default=1)
parser.add_argument("--resume_checkpoint", type=str, default=None)
parser.add_argument("--num_workers", type=int, default=0)
parser.add_argument("--embedding_size", type=int, default=768)
parser.add_argument("--num_hidden_layers", type=int, default=12)
parser.add_argument("--num_attention_heads", type=int, default=6)
parser.add_argument("--intermediate_size", type=int, default=3072)
parser.add_argument("--learning_rate", type=float, default=1e-5)
parser.add_argument("--dropout_prob", type=float, default=0.3)
parser.add_argument("--lora_dropout", type=float, default=0.2)
parser.add_argument("--classifier_dropout", type=float, default=0.4)
parser.add_argument("--device", type=str, default="cuda")
parser.add_argument("--gpu_mixed_precision", type=bool, default=True)
parser.add_argument("--patience", type=int, default=10)
parser.add_argument("--num_labels", type=int, default=2)
parser.add_argument("--use_lora", type=bool, default=True)
parser.add_argument("--gamma", type=float, default=2.0)
parser.add_argument("--beta", type=float, default=0.99)
parser.add_argument('--lora_weight', type=int, default=10)
parser.add_argument('--classifier_weight', type=int, default=50)
parser.add_argument("--loss", type=str, default="cross_entropy")
parser.add_argument("--pretrain", action='store_true', default=True)
parser.add_argument("--clip_interval", type=int, default=10)
parser.add_argument("--pretrain_path", type=str, default="best_pretrain_model_after_masking_35epoch.pth")


args = parser.parse_args({})
args.attention_window = [512] * args.num_hidden_layers


In [10]:
kwargs_handlers = [DistributedDataParallelKwargs(find_unused_parameters=True)]
accelerator = Accelerator(mixed_precision="fp16" if args.gpu_mixed_precision else "no", kwargs_handlers=kwargs_handlers)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [11]:



class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """
    
    def __init__(self,
                 optimizer : torch.optim.Optimizer,
                 first_cycle_steps : int,
                 cycle_mult : float = 1.,
                 max_lr : float = 0.1,
                 min_lr : float = 0.001,
                 warmup_steps : int = 0,
                 gamma : float = 1.,
                 last_epoch : int = -1
        ):
        assert warmup_steps < first_cycle_steps
        
        self.first_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle_mult = cycle_mult # cycle steps magnification
        self.base_max_lr = max_lr # first max learning rate
        self.max_lr = max_lr # max learning rate in the current cycle
        self.min_lr = min_lr # min learning rate
        self.warmup_steps = warmup_steps # warmup step size
        self.gamma = gamma # decrease rate of max learning rate by cycle
        
        self.cur_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle = 0 # cycle count
        self.step_in_cycle = last_epoch # step size of the current cycle
        
        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)
        
        # set learning rate min_lr
        self.init_lr()
    
    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)
    
    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr)*self.step_in_cycle / self.warmup_steps + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle-self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int((self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch
                
        self.max_lr = self.base_max_lr * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr


def configure_optimizers(model, args):
    optimizer = optim.AdamW(model.parameters(), lr=args.learning_rate)
    
    for param_group in optimizer.param_groups:
        param_group['initial_lr'] = args.learning_rate
    
    # n_warmup_steps = int(n_steps * 0.1)
    # n_decay_steps = n_steps - n_warmup_steps
    
    # warmup = LinearLR(optimizer, 
    #                     start_factor=0.01,
    #                     end_factor=1.0,
    #                     total_iters=n_warmup_steps)
    
    # decay = LinearLR(optimizer,
    #                     start_factor=1.0,
    #                     end_factor=0.01,
    #                     total_iters=n_decay_steps)
    
    # scheduler = SequentialLR(optimizer, 
    #                             schedulers=[warmup, decay],
    #                             milestones=[n_warmup_steps])
    # scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=500, T_mult=2)
    if args.resume:
        scheduler = CosineAnnealingWarmupRestarts(optimizer,
                                                first_cycle_steps=2690,
                                                cycle_mult=1.5,
                                                max_lr=0.0001,
                                                min_lr=0.000001,
                                                warmup_steps=269,
                                                gamma=0.9,
                                                last_epoch=args.resume_epoch
                                                )
    else:
        scheduler = CosineAnnealingWarmupRestarts(optimizer,
                                                first_cycle_steps=2690,
                                                cycle_mult=1.5,
                                                max_lr=0.0001,
                                                min_lr=0.000001,
                                                warmup_steps=269,
                                                gamma=0.9,
                                                )

    return optimizer, {"scheduler": scheduler, "interval": "step"}

In [23]:
model = LongformerPretrainNormal(
        vocab_size=args.vocab_size,
        itemid_size=args.itemid_size,
        max_position_embeddings=args.max_position_embeddings,
        unit_size=args.unit_size,
        continuous_size=args.continuous_size,
        task_size=args.task_size,
        max_age=args.max_age,
        gender_size=args.gender_size,
        embedding_size=args.embedding_size,
        num_hidden_layers=args.num_hidden_layers,
        num_attention_heads=args.num_attention_heads,
        intermediate_size=args.intermediate_size,
        learning_rate=args.learning_rate,
        dropout_prob=args.dropout_prob,
        gpu_mixed_precision=args.gpu_mixed_precision,
    ).to(args.device)

optimizer, scheduler = configure_optimizers(model, args,)

model_path = "./results/best_pretrain_model.pth"
checkpoint_path = f"./results/best_pretrain_model_after_masking_{args.resume_epoch}epoch.pth"
logging.info(f"Loading checkpoint from {checkpoint_path}")
checkpoint = torch.load(checkpoint_path, map_location=args.device, weights_only=True)

new_state_dict = {}
for k, v in checkpoint['model_state_dict'].items():
    if k.startswith('module.module.'):
        new_state_dict[k[14:]] = v  
    elif k.startswith('module.'):
        new_state_dict[k[7:]] = v 
    else:
        new_state_dict[k] = v  
filtered_state_dict = {k: v for k, v in new_state_dict.items()}

# model.load_state_dict(filtered_state_dict)


model_keys = set(model.state_dict().keys())

# 체크포인트의 state_dict 키
checkpoint_keys = set(filtered_state_dict.keys())

# 모델과 체크포인트의 키가 일치하는지 확인
if model_keys == checkpoint_keys:
    print("Model state_dict and checkpoint state_dict keys match.")
else:
    print("Model and checkpoint have mismatching keys.")
    print("Keys in model but not in checkpoint:", model_keys - checkpoint_keys)
    print("Keys in checkpoint but not in model:", checkpoint_keys - model_keys)

# model.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# print("Resume model loaded successfully.")

# args.start_epoch = args.resume_epoch + 1
# logging.info(f"Resuming from epoch {args.start_epoch}")

# lr = scheduler["scheduler"].get_lr()
# for param_group, lr_val in zip(optimizer.param_groups, lr):
#     param_group['lr'] = lr_val
    
# logging.info(f"Resumed learning rates: {lr}")
        

Model and checkpoint have mismatching keys.
Keys in model but not in checkpoint: set()
Keys in checkpoint but not in model: {'model.longformer.encoder.layer.8.attention.output.LayerNorm.bias', 'model.longformer.encoder.layer.6.attention.self.value.bias', 'model.longformer.encoder.layer.6.intermediate.dense.weight', 'model.longformer.encoder.layer.3.output.dense.weight', 'model.longformer.encoder.layer.6.attention.self.query_global.bias', 'model.longformer.encoder.layer.7.attention.self.value_global.weight', 'model.longformer.encoder.layer.11.attention.self.query.weight', 'model.longformer.encoder.layer.5.intermediate.dense.weight', 'model.longformer.encoder.layer.1.attention.self.value.bias', 'model.longformer.encoder.layer.1.attention.self.key_global.bias', 'model.longformer.encoder.layer.7.attention.self.value_global.bias', 'model.longformer.encoder.layer.5.intermediate.dense.bias', 'model.longformer.encoder.layer.2.attention.output.LayerNorm.bias', 'model.longformer.encoder.layer.9.

In [6]:
model = LongformerPretrainNormal(
        vocab_size=args.vocab_size,
        itemid_size=args.itemid_size,
        max_position_embeddings=args.max_position_embeddings,
        unit_size=args.unit_size,
        continuous_size=args.continuous_size,
        task_size=args.task_size,
        max_age=args.max_age,
        gender_size=args.gender_size,
        embedding_size=args.embedding_size,
        num_hidden_layers=args.num_hidden_layers,
        num_attention_heads=args.num_attention_heads,
        intermediate_size=args.intermediate_size,
        learning_rate=args.learning_rate,
        dropout_prob=args.dropout_prob,
        gpu_mixed_precision=args.gpu_mixed_precision,
    ).to(args.device)

checkpoint = torch.load(checkpoint_path, map_location=args.device, weights_only=True)

# print(checkpoint['model_state_dict'].keys())
new_state_dict = {}
for k, v in checkpoint['model_state_dict'].items():
    if k.startswith('module.module.'):
        new_state_dict[k[14:]] = v

print(set(model.state_dict().keys()) - set(new_state_dict.keys()))
if set(model.state_dict().keys()) == set(new_state_dict.keys()):
    print("Model state_dict and checkpoint state_dict keys match.")
else:
    print("Model and checkpoint have mismatching keys.")
    print("Keys in model but not in checkpoint:", set(model.state_dict().keys()) - set(new_state_dict.keys()))
    print("Keys in checkpoint but not in model:", set(new_state_dict.keys()) - set(model.state_dict().keys()))
        

NameError: name 'checkpoint_path' is not defined

In [6]:
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


peft_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,  # Assuming this is for sequence classification
        inference_mode=False,  # Set to True if using for inference only
        r=8,  # Rank of the low-rank matrices
        lora_alpha=32,  # Scaling factor for the low-rank matrices
        lora_dropout=0.1,  # Dropout probability for LoRA layers
        target_modules=["query", "value"], # Target attention layers
    )
before_memory = torch.cuda.memory_allocated()
trainable_params_before_lora = count_trainable_parameters(pretrained_model)
print(f"LoRA 적용 전 학습 가능한 파라미터 수: {trainable_params_before_lora}")
model_with_lora = get_peft_model(pretrained_model, peft_config)
after_memory = torch.cuda.memory_allocated()
trainable_params_after_lora = count_trainable_parameters(model_with_lora)
print(f"LoRA 적용 후 학습 가능한 파라미터 수: {trainable_params_after_lora}")
# 파라미터 수 감소 비율 계산
reduction_ratio = (trainable_params_before_lora - trainable_params_after_lora) / trainable_params_before_lora * 100
print(f"학습 가능한 파라미터 수 감소 비율: {reduction_ratio:.2f}%")

print(f"메모리 사용량: {after_memory - before_memory}")

LoRA 적용 전 학습 가능한 파라미터 수: 31680176
LoRA 적용 후 학습 가능한 파라미터 수: 49152
학습 가능한 파라미터 수 감소 비율: 99.84%
메모리 사용량: 196608


In [11]:
for name, p in finetune_model.named_parameters():
    print(name)

embeddings.concept_embedding.procedure_embedding.weight
embeddings.concept_embedding.medication_embedding.weight
embeddings.concept_embedding.chart_embedding.weight
embeddings.position_embedding.position_embeddings.weight
embeddings.time_embedding.w
embeddings.time_embedding.b
embeddings.time_embedding.freqs
embeddings.time_embedding.projection.weight
embeddings.time_embedding.projection.bias
embeddings.value_embedding.value_embedding.0.weight
embeddings.value_embedding.value_embedding.0.bias
embeddings.value_embedding.value_embedding.2.weight
embeddings.value_embedding.value_embedding.2.bias
embeddings.unit_embedding.unit_embedding.weight
embeddings.continuous_embedding.embedding.weight
embeddings.age_embedding.age_embedding.weight
embeddings.gender_embedding.gender_embedding.weight
embeddings.task_embedding.task_embedding.weight
embeddings.LayerNorm.weight
embeddings.LayerNorm.bias
model.embeddings.word_embeddings.weight
model.embeddings.token_type_embeddings.weight
model.embeddings.

In [7]:


finetune_model = LongformerFinetune(
    pretrained_model=model_with_lora,
    problem_type="single_label_classification",
    num_labels=2,
    learning_rate=5e-5,
    classifier_dropout=0.1,
    use_lora=True
)

In [8]:
pretrained_params = [p for name, p in finetune_model.named_parameters() if 'lora' not in name]
lora_params = list(finetune_model.lora_parameters())
classifier_params = list(finetune_model.classifier_parameters())

print("Pretrained Parameters:")
for param in pretrained_params:
    print(param)

print("\nLoRA Parameters:")
for param in lora_params:
    print(param)

print("\nClassifier Parameters:")
for param in classifier_params:
    print(param)


Pretrained Parameters:
Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.2003, -0.8854, -2.0977,  ...,  0.8637, -1.0634,  0.6415],
        [ 0.1859, -0.1252,  2.1717,  ...,  1.8789, -1.4072,  1.0232],
        ...,
        [ 1.2455, -0.2159, -0.3655,  ...,  2.3189,  0.3499, -0.0739],
        [-1.6800,  0.5436, -0.6656,  ..., -1.4946, -0.7375,  0.3359],
        [ 0.0520,  0.7949, -0.9065,  ..., -0.3273, -1.7657,  1.3392]],
       device='cuda:0')
Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.3414, -0.0902, -1.5275,  ..., -0.1391,  0.7844, -1.3369],
        [ 0.0913, -2.1859, -0.9842,  ...,  0.6543,  0.1514,  0.7890],
        ...,
        [ 0.4680, -1.4265,  0.1359,  ..., -1.3171,  0.4589, -0.2161],
        [ 0.5415,  1.4428,  0.6588,  ...,  0.5114,  0.2453,  1.0027],
        [ 0.0988,  1.5918, -1.1827,  ..., -0.6837, -1.6430, -1.1480]],
       device='cuda:0')
Parameter contain

In [None]:
def configure_optimizers(model, args, n_steps):
    base_lr = args.learning_rate  
    lora_lr = args.learning_rate * args.lora_weight
    classifier_lr = args.learning_rate * args.lora_weight
    
    pretrained_params = [p for name, p in model.named_parameters() if 'lora' not in name]
    lora_params = list(model.lora_parameters())
    classifier_params = list(model.classifier_parameters())

    optimizer = optim.AdamW([
        {'params': pretrained_params, 'lr': base_lr},  
        {'params': lora_params, 'lr': lora_lr},  
        {'params': classifier_params, 'lr': classifier_lr}, 
    ])

    n_warmup_steps = int(n_steps * 0.1)
    n_decay_steps = n_steps - n_warmup_steps

    warmup = LinearLR(optimizer, 
                        start_factor=0.01,
                        end_factor=1.0,
                        total_iters=n_warmup_steps)
    
    decay = LinearLR(optimizer,
                        start_factor=1.0,
                        end_factor=0.01,
                        total_iters=n_decay_steps)
    
    scheduler = SequentialLR(optimizer, 
                                schedulers=[warmup, decay],
                                milestones=[n_warmup_steps])

    return optimizer, {"scheduler": scheduler, "interval": "step"}

In [25]:
print(list(finetune_model.pretrained_parameters()))

[Parameter containing:
tensor([[ 0.0271,  0.0132, -0.0369,  ...,  0.0185,  0.0606, -0.0114],
        [-0.0268,  0.0478, -0.0033,  ..., -0.0049, -0.0060, -0.0140],
        [ 0.0206, -0.0008, -0.0173,  ..., -0.0130, -0.0201,  0.0082],
        ...,
        [-0.0036, -0.0187,  0.0137,  ...,  0.0163,  0.0063, -0.0082],
        [ 0.0373,  0.0002,  0.0166,  ...,  0.0053,  0.0242, -0.0200],
        [ 0.0268,  0.0062, -0.0194,  ..., -0.0015,  0.0178, -0.0536]],
       device='cuda:0'), Parameter containing:
tensor([[ 0.0071, -0.0145, -0.0272,  ..., -0.0159, -0.0029,  0.0082],
        [-0.0306, -0.0028, -0.0202,  ..., -0.0249,  0.0230,  0.0229]],
       device='cuda:0'), Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 

In [15]:
count_trainable_parameters(finetune_model)

641282

In [11]:
# 사전학습된 모델과 파인튜닝 모델의 동일한 레이어의 가중치 비교
pretrained_weight = pretrained_model.embeddings.concept_embedding.procedure_embedding.weight
finetune_weight = finetune_model.embeddings.concept_embedding.procedure_embedding.weight

# 두 가중치가 동일한지 확인
if torch.allclose(pretrained_weight, finetune_weight):
    print("Weights are identical, pretrained weights have been correctly loaded.")
else:
    print("Weights are different, there may be an issue with loading pretrained weights.")


Weights are identical, pretrained weights have been correctly loaded.


In [13]:
from torch.utils.data import Sampler
import traceback
import random
import torch.nn.init as init
# seed_everything(args.seed)
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
    
itemid2idx = pd.read_pickle("datasets/entire_itemid2idx.pkl")
unit2idx = pd.read_pickle("datasets/unit2idx.pkl")

def configure_optimizers(model, args, n_steps):
    base_lr = args.learning_rate  
    lora_lr = args.learning_rate * args.lora_weight
    classifier_lr = args.learning_rate * args.classifier_weight
    
    pretrained_params = model.pretrained_parameters()
    lora_params = model.lora_parameters()
    classifier_params = model.classifier_parameters()

    optimizer = optim.AdamW([
        {'params': pretrained_params, 'lr': base_lr, 'weight_decay': 0.01},
        {'params': lora_params, 'lr': lora_lr, 'weight_decay': 0.001},
        {'params': classifier_params, 'lr': classifier_lr, 'weight_decay': 0.001}
    ])


    # CosineAnnealingWarmRestarts 
    # scheduler = CosineAnnealingWarmupRestarts(optimizer,
    #                                           first_cycle_steps=538,
    #                                           cycle_mult=2,
    #                                           max_lr=0.0001,
    #                                           min_lr=0.000001,
    #                                           warmup_steps=54,
    #                                           gamma=0.5
    #                                           )
    # scheduler = CosineAnnealingWarmRestarts(optimizer,
    #                                     T_0=538,       
    #                                     T_mult=2,     
    #                                    )
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.5,
        patience=3,
        min_lr=1e-6
    )

    return optimizer, {"scheduler": scheduler, "interval": "step"}

def calculate_alpha(dataset, num_classes):
    """
    Calculate the alpha values based on the class distribution in the dataset.
    
    Parameters:
    - dataset: The dataset to calculate class distribution from.
    - num_classes: The total number of classes in the dataset.
    
    Returns:
    - alpha: Tensor containing the alpha values for each class.
    """
    # Get all targets in the dataset
    all_targets = [label.item() for *_, label in dataset]

    # Count the occurrences of each class
    class_counts = Counter(all_targets)

    # Calculate the total number of samples
    total_count = sum(class_counts.values())

    # Calculate alpha as the inverse of the class frequency
    alpha = torch.zeros(num_classes)
    for cls, count in class_counts.items():
        alpha[cls] = total_count / (num_classes * count)
    
    # Normalize alpha to sum to 1
    alpha = alpha / alpha.sum()

    return alpha
class CustomSampler(Sampler):
    def __init__(self, dataset, batch_size):
        self.dataset = dataset
        self.batch_size = batch_size
        

        self.labels = [self.dataset[idx][-1].item() for idx in range(len(self.dataset))]
        
   
        self.class_indices = {label: np.where(np.array(self.labels) == label)[0].tolist() for label in np.unique(self.labels)}
        
 
        self.class_probs = {label: len(self.class_indices[label]) / len(self.dataset) for label in np.unique(self.labels)}
        
    
        self.min_class_1_per_batch = 1
        self.class_1_indices = self.class_indices[1]
        self.other_class_indices_template = {label: self.class_indices[label] for label in self.class_indices if label != 1}

   
        self.indices = []

    def _generate_indices(self):
        indices = []
        num_batches = len(self.dataset) // self.batch_size
        
      
        class_1_per_batch = np.array_split(self.class_1_indices, num_batches)

        other_class_indices = {label: indices_list[:] for label, indices_list in self.other_class_indices_template.items()}
        
        for batch_num in range(num_batches):
            batch_indices = []
            
        
            batch_indices.extend(class_1_per_batch[batch_num])
            
      
            remaining_batch_size = self.batch_size - len(batch_indices)
            other_samples = []
            
            for label, indices_list in other_class_indices.items():
                if len(indices_list) > 0:
                
                    if remaining_batch_size > len(indices_list):
                        selected_indices = indices_list
                    else:
                        selected_indices = np.random.choice(indices_list, remaining_batch_size, replace=False).tolist()
                    
                    other_samples.extend(selected_indices)
                    
                  
                    other_class_indices[label] = [idx for idx in indices_list if idx not in selected_indices]
            
            np.random.shuffle(other_samples)
            batch_indices.extend(other_samples[:remaining_batch_size])
        
            np.random.shuffle(batch_indices)
            indices.extend(batch_indices)
        
        return indices
    
    def __iter__(self):
   
        self.indices = self._generate_indices()
        return iter(self.indices)

    def __len__(self):
        return len(self.indices)
    
pretrained_model = LongformerPretrainNormal(
        vocab_size=args.vocab_size,
        itemid_size=args.itemid_size,
        max_position_embeddings=args.max_position_embeddings,
        unit_size=args.unit_size,
        continuous_size=args.continuous_size,
        task_size=args.task_size,
        max_age=args.max_age,
        gender_size=args.gender_size,
        embedding_size=args.embedding_size,
        num_hidden_layers=args.num_hidden_layers,
        num_attention_heads=args.num_attention_heads,
        intermediate_size=args.intermediate_size,
        learning_rate=args.learning_rate,
        dropout_prob=args.dropout_prob,
        gpu_mixed_precision=args.gpu_mixed_precision,
    ).to(args.device)
pretrain_path = os.path.join("./results/", args.pretrain_path)
checkpoint = torch.load(pretrain_path, map_location=args.device, weights_only=True)
state_dict = checkpoint['model_state_dict']

new_state_dict = {}
for k, v in state_dict.items():
    if k.startswith('module.module.'):
        new_state_dict[k[14:]] = v  
    elif k.startswith('module.'):
        new_state_dict[k[7:]] = v 
    else:
        new_state_dict[k] = v  
filtered_state_dict = {k: v for k, v in new_state_dict.items() if 'task_embedding' not in k}

pretrained_model.load_state_dict(filtered_state_dict, strict=False)
print("Pre-trained model loaded successfully.")
def initialize_weights(module):
    if isinstance(module, torch.nn.Embedding):
        init.xavier_uniform_(module.weight.data) 
        

pretrained_model.embeddings.task_embedding.apply(initialize_weights)
peft_config = LoraConfig(
            task_type=TaskType.SEQ_CLS,  # Assuming this is for sequence classification
            inference_mode=False,  # Set to True if using for inference only
            r=8,  # Rank of the low-rank matrices
            lora_alpha=16,  # Scaling factor for the low-rank matrices
            lora_dropout=args.lora_dropout,  # Dropout probability for LoRA layers
            target_modules=["query", "value"], # Target attention layers
)
pretrained_model = get_peft_model(pretrained_model, peft_config)
print("Applying LoRA")

model = LongformerFinetune(
        pretrained_model=pretrained_model,
        problem_type="single_label_classification",
        num_labels=2,
        learning_rate=args.learning_rate,
        classifier_dropout=args.classifier_dropout,
        use_lora=args.use_lora,
    ).to(args.device)
print(f"Train Parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
# 데이터셋 생성
train_dataset = EHR_Longformer_Dataset(Path("./datasets"), "train", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode=args.mode)
valid_dataset = EHR_Longformer_Dataset(Path("./datasets"), "valid", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode=args.mode)
custom_sampler = CustomSampler(train_dataset, batch_size=args.batch_size)
train_loader = DataLoader(train_dataset, 
                            batch_size=args.batch_size,
                            sampler=custom_sampler,  # shuffle should be False if using DistributedSampler
                            # shuffle=True,
                            pin_memory=args.pin_memory, 
                            num_workers=args.num_workers,
                            )

valid_loader = DataLoader(valid_dataset, 
                            batch_size=args.batch_size, 
                            shuffle=False,  # Validation should not be shuffled
                            pin_memory=args.pin_memory, 
                            num_workers=args.num_workers,
                            )

class_weights = calculate_alpha(train_dataset, args.num_labels)

# 샘플링에 사용할 레이블의 인덱스를 찾기 위해 데이터셋의 레이블이 포함된 인덱스 확인
labels = [train_dataset[i][-1].item() for i in range(len(train_dataset))]

# 질병 샘플(1)의 비율을 정하거나, 일정 수로 지정
num_samples_1 = sum(labels)  # 예시: 질병 샘플의 수로 설정
target = 1  # 질병 레이블 (1)


# DataLoader 설정
train_loader = DataLoader(train_dataset, 
                          batch_size=16,
                        sampler = CustomSampler(train_dataset, batch_size=16), 
                        # shuffle=True,
                          pin_memory=args.pin_memory, 
                          num_workers=args.num_workers)
print(len(train_loader))
criterion = nn.CrossEntropyLoss(weight=class_weights.to(args.device))

n_steps = (len(train_dataset) // args.batch_size) * args.epochs
optimizer, scheduler = configure_optimizers(model, args, n_steps)

model, optimizer, train_loader, valid_loader, scheduler = accelerator.prepare(
        model, optimizer, train_loader, valid_loader, scheduler['scheduler']
    )
# 배치 처리
# ids_0 = 0
# ids_1 = 0
# ids_2 = 0
# ids_3 = 0
# for epoch in tqdm(range(0, 30)):
#     for step, batch in tqdm(enumerate(train_loader), desc="Steps", total=len(train_loader)):
#         batch = tuple(t.to(args.device) if isinstance(t, torch.Tensor) else t for t in batch)
#         input_ids, attention_mask, age_ids, gender_ids, value_ids, unit_ids, time_ids, continuous_ids, position_ids, token_type_ids, task_token, labels = batch
#         # print(torch.sum(labels).item())
#         if step == 0:
#             print(labels)
#         if torch.sum(labels).item() == 0:
#             ids_0 += 1
#         elif torch.sum(labels).item() == 1:
#             ids_1 += 1
#         elif torch.sum(labels).item() == 2:
#             ids_2 += 1
#         elif torch.sum(labels).item() == 3:
#             ids_3 += 1  
        

# print(ids_0, ids_1, ids_2, ids_3)



Pre-trained model loaded successfully.
Applying LoRA
Train Parameters: 887042
0


In [7]:
def calculate_class_weights(dataset):
    labels = [label.item() for *_, label in dataset]
    

    class_counts = Counter(labels)
    total_samples = len(dataset)
    
    class_weights = {}
    for class_idx, count in class_counts.items():
        class_weights[class_idx] = total_samples / (len(class_counts) * count)
    
    return class_weights
class_weights = calculate_class_weights(train_dataset)
class_weights

{0: 0.5644674711437565, 1: 4.377924720244151}

In [11]:
for step, batch in train_loader:
    print(batch)
    break

In [1]:
def calculate_class_weights(dataset):
    labels = [label.item() for *_, label in dataset]
    

    class_counts = Counter(labels)
    total_samples = len(dataset)
    
    class_weights = {}
    for class_idx, count in class_counts.items():
        class_weights[class_idx] = total_samples / (len(class_counts) * count)
    
    return class_weights
def normalize_class_weights(class_weights):
    total = sum(class_weights.values())
    return {k: v / total for k, v in class_weights.items()}

class_weights = calculate_class_weights(train_dataset)
print(class_weights)
print([class_weights[0], class_weights[1]])
normalized_class_weights = normalize_class_weights(class_weights)

# Focal Loss의 alpha로 사용하기 위해 리스트로 변환
alpha = [normalized_class_weights[0], normalized_class_weights[1]]



print(f"Normalized class weights: {alpha}")

NameError: name 'train_dataset' is not defined

In [6]:
train_dataset[0][-1]

tensor([0])

In [81]:
logits

tensor([[ 0.0866,  0.0131],
        [-0.2563, -0.1538],
        [ 0.2018,  0.0390],
        [-0.0540,  0.0140],
        [-0.0110,  0.3871],
        [ 0.0810,  0.2139],
        [-0.2117,  0.3357],
        [-0.1501, -0.4084]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [82]:
labels

tensor([[1],
        [0],
        [0],
        [0],
        [0],
        [1],
        [0],
        [0]], device='cuda:0')

In [87]:
preds

tensor([0, 1, 0, 1, 1, 1, 1, 0], device='cuda:0')

In [102]:
def calculate_metrics(predictions, labels): 
    probs = F.softmax(predictions, dim=1)
    preds = torch.argmax(probs, dim=1)
    
    labels = labels.view(-1) 
    
    preds_np = preds.cpu().detach().numpy()
    labels_np = labels.cpu().detach().numpy()
    probs_np = probs[:, 1].cpu().detach().numpy()
    
    # Debugging: Check prediction and label distribution
    print("Predicted labels:", np.unique(preds_np, return_counts=True))
    print("True labels:", np.unique(labels_np, return_counts=True))
    
    precision = precision_score(labels_np, preds_np, zero_division=0)
    recall = recall_score(labels_np, preds_np, zero_division=0)
    f1 = f1_score(labels_np, preds_np, zero_division=0)
    
    if len(np.unique(labels_np)) > 1:
        auroc = roc_auc_score(labels_np, probs_np)
        auprc = average_precision_score(labels_np, probs_np)
    else:
        # If only one class is present, return default values
        auroc = 0.5  # Equivalent to random guessing
        auprc = 0  # For imbalanced data, AUPRC is often similar to precision

    metrics = {
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'auroc': auroc,
        'auprc': auprc
    }
    return metrics

In [103]:
calculate_metrics(logits.view(-1, 2), labels.view(-1))

Predicted labels: (array([0, 1]), array([3, 5]))
True labels: (array([0, 1]), array([4, 3]))


ValueError: Found input variables with inconsistent numbers of samples: [7, 8]

In [101]:
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

# DataLoader에 샘플러 적용
train_loader = DataLoader(train_dataset, 
                          batch_size=args.batch_size,
                          sampler=sampler,  # shuffle=False, shuffle is controlled by sampler
                          pin_memory=args.pin_memory, 
                          num_workers=args.num_workers,
                          )


for step, batch in tqdm(enumerate(train_loader), desc="Steps", total=len(train_loader)):
            
    batch = tuple(t.to(args.device) if isinstance(t, torch.Tensor) else t for t in batch)
    input_ids, attention_mask, age_ids, gender_ids, value_ids, unit_ids, time_ids, continuous_ids, position_ids, token_type_ids, task_token, labels = batch

    if sum(labels).item() == 0:
        print("0")
    

  

Steps:   5%|▍         | 51/1076 [00:00<00:06, 162.73it/s]

0


Steps:  40%|███▉      | 429/1076 [00:04<00:07, 92.25it/s] 

0


Steps: 100%|██████████| 1076/1076 [00:11<00:00, 91.64it/s] 


In [None]:
def calculate_class_weights(dataset):
    labels = [label.item() for *_, label in dataset]
    class_counts = Counter(labels)
    total_samples = len(dataset)
    
    sample_weights = {cls: count / total_samples for cls, count in class_counts.items()}
    sample_weights = [sample_weights[label.item()] for *_, label in dataset]
    sample_weights = torch.tensor(sample_weights, dtype=torch.float32)
    
    class_weights = {}
    for class_idx, count in class_counts.items():
        class_weights[class_idx] = total_samples / (len(class_counts) * count)
    
    return class_weights, sample_weights

In [23]:
def calculate_class_weights(dataset):
    from collections import Counter
    
    labels = [label.item() for *_, label in dataset]
    class_counts = Counter(labels)
    total_samples = len(dataset)
    
    class_weights = {}
    for class_idx, count in class_counts.items():
        class_weights[class_idx] = total_samples / (len(class_counts) * count)
    
    # Normalize class weights so that the maximum weight is 1
    max_weight = max(class_weights.values())
    normalized_class_weights = {k: v / max_weight for k, v in class_weights.items()}
    
    return normalized_class_weights

# Example usage
train_dataset = EHR_Longformer_Dataset(Path("./datasets"), "train", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode=args.mode)
class_weights = calculate_class_weights(train_dataset)
print(class_weights)
weights = torch.tensor([class_weights[i] for i in range(len(class_weights))], dtype=torch.float32).to(args.device)
print(weights)

{0: 0.23984442523768368, 1: 1.0}
tensor([0.2398, 1.0000], device='cuda:0')


In [None]:
def calculate_class_weights(dataset):
    # 레이블 추출
    labels = [label.item() for *_, label in dataset]
    class_counts = Counter(labels)
    total_samples = len(dataset)
    
    # 클래스 가중치 계산
    class_weights = {}
    for class_idx, count in class_counts.items():
        class_weights[class_idx] = total_samples / (len(class_counts) * count)
    
    # 클래스 가중치의 반대 비율로 샘플 가중치 설정
    max_weight = max(class_weights.values())
    inverse_class_weights = {cls: max_weight / class_weights[cls] for cls in class_weights}
    
    # 각 샘플의 가중치 설정
    sample_weights = [inverse_class_weights[label.item()] for *_, label in dataset]
    sample_weights = torch.tensor(sample_weights, dtype=torch.float32)
    
    return class_weights, sample_weights


In [165]:
train_labels = [label.item() for *_, label in train_dataset]
train_class_counts = torch.bincount(torch.tensor(train_labels))
print(train_class_counts)
train_class_weights = 1.0 / train_class_counts.float()
print(train_class_weights)
train_sample_weights = train_class_weights[torch.tensor(train_labels)]
print(train_sample_weights)

tensor([6942, 1665])
tensor([0.0001, 0.0006])
tensor([0.0001, 0.0001, 0.0001,  ..., 0.0001, 0.0001, 0.0001])


In [151]:
class_weights, sample_weights = calculate_class_weights(train_dataset)
print(class_weights)
print(sample_weights)

{0: 0.6199222126188418, 1: 2.5846846846846847}
tensor([4.1694, 4.1694, 4.1694,  ..., 4.1694, 4.1694, 4.1694])


In [152]:
sample_weights[:5]

tensor([4.1694, 4.1694, 4.1694, 4.1694, 1.0000])

In [17]:
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
    
itemid2idx = pd.read_pickle("datasets/entire_itemid2idx.pkl")
unit2idx = pd.read_pickle("datasets/unit2idx.pkl")


train_dataset = EHR_Longformer_Dataset(Path("./datasets"), "train", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode=args.mode)

# 각 클래스의 빈도 계산
train_labels = [label.item() for *_, label in train_dataset]
class_counts = Counter(train_labels)

# 클래스 가중치 계산
total_samples = len(train_dataset)
class_weights = {class_idx: total_samples / (len(class_counts) * count) for class_idx, count in class_counts.items()}
weights = [class_weights[label.item()] for *_, label in train_dataset]

# WeightedRandomSampler 생성
sampler = WeightedRandomSampler(weights, len(weights), replacement=False)

# DataLoader 생성
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=sampler, pin_memory=args.pin_memory, num_workers=args.num_workers)



In [5]:
import torch
from torch.utils.data import Sampler, WeightedRandomSampler
import numpy as np
from collections import Counter

class CustomSampler(Sampler):
    def __init__(self, dataset, batch_size):
        self.dataset = dataset
        self.batch_size = batch_size
        
        # 각 데이터의 라벨을 가져옵니다.
        self.labels = [self.dataset[idx][-1].item() for idx in range(len(self.dataset))]
        
        # 클래스별로 인덱스를 저장합니다.
        self.class_indices = {label: np.where(np.array(self.labels) == label)[0].tolist() for label in np.unique(self.labels)}
        
        # 클래스별 비율을 계산합니다.
        self.class_probs = {label: len(self.class_indices[label]) / len(self.dataset) for label in np.unique(self.labels)}
        
        # 배치당 각 클래스의 샘플 수를 계산합니다.
        total_ratio = sum(self.class_probs.values())
        self.batch_class_counts = {label: int(round(self.class_probs[label] * self.batch_size / total_ratio)) for label in np.unique(self.labels)}
        
        # 인덱스를 생성합니다.
        self.indices = self._generate_indices()
        
    def _generate_indices(self):
        indices = []
        class_counters = {label: 0 for label in self.class_indices.keys()}
        num_batches = len(self.dataset) // self.batch_size
        
        for _ in range(num_batches):
            batch_indices = []
            for label, count in self.batch_class_counts.items():
                remaining_samples = len(self.class_indices[label]) - class_counters[label]
                
                if remaining_samples >= count:
                    selected_indices = self.class_indices[label][class_counters[label]:class_counters[label] + count]
                else:
                    selected_indices = self.class_indices[label][class_counters[label]:]
                
                batch_indices.extend(selected_indices)
                class_counters[label] += len(selected_indices)
            
            np.random.shuffle(batch_indices)
            indices.extend(batch_indices)
        
        # 남은 샘플들을 마지막에 추가 (만약 남는 경우가 있다면)
        remaining_indices = []
        for label, count in self.batch_class_counts.items():
            remaining_indices.extend(self.class_indices[label][class_counters[label]:])
        
        if remaining_indices:
            np.random.shuffle(remaining_indices)
            indices.extend(remaining_indices[:self.batch_size])

        return indices
    
    def __iter__(self):
        return iter(self.indices)

    def __len__(self):
        return len(self.indices)

tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
    
itemid2idx = pd.read_pickle("datasets/entire_itemid2idx.pkl")
unit2idx = pd.read_pickle("datasets/unit2idx.pkl")


# Usage Example
train_dataset = EHR_Longformer_Dataset(Path("./datasets"), "train", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode=args.mode)

# Calculate class weights
labels = [label.item() for *_, label in train_dataset]
class_counts = Counter(labels)
total_samples = len(train_dataset)
class_weights = {class_idx: total_samples / (len(class_counts) * count) for class_idx, count in class_counts.items()}
weights = [class_weights[label] for label in labels]

# Create the custom sampler
balanced_sampler = CustomSampler(weights, labels, batch_size=args.batch_size)

# DataLoader with custom sampler
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=balanced_sampler, pin_memory=args.pin_memory, num_workers=args.num_workers)




In [5]:
for step, batch in tqdm(enumerate(train_loader), desc="Steps", total=len(train_loader)):
    print(batch)
    break

Steps:   0%|          | 0/135 [00:00<?, ?it/s]


Unexpected exception formatting exception. Falling back to standard exception
Unexpected exception formatting exception. Falling back to standard exception
Unexpected exception formatting exception. Falling back to standard exception


In [26]:
try:
    for step, batch in tqdm(enumerate(train_loader), desc="Steps", total=len(train_loader)):
            
        batch = tuple(t.to(args.device) if isinstance(t, torch.Tensor) else t for t in batch)
        input_ids, attention_mask, age_ids, gender_ids, value_ids, unit_ids, time_ids, continuous_ids, position_ids, token_type_ids, task_token, labels = batch
        print(labels)
        if step == 20:
            break
except Exception as e:
    print(f"Exception occurred: {e}")

Steps:   0%|          | 0/135 [00:00<?, ?it/s]

Exception occurred: list indices must be integers or slices, not list





In [83]:
weights = torch.tensor([class_weights[i] for i in range(len(class_weights))], dtype=torch.float32)
weights

tensor([0.6199, 2.5847])

In [87]:
nn.CrossEntropyLoss(weight=weights)

CrossEntropyLoss()

In [62]:
logits

tensor([[-0.3848,  0.2786],
        [-0.0680, -0.0041],
        [-0.6473,  0.1456],
        [-0.2334,  0.2933],
        [-0.2564,  0.1482],
        [-0.3317,  0.3650],
        [-0.5295,  0.4612],
        [-0.3901,  0.1189]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [70]:
a = torch.tensor([0, 1, 1, 1, 1, 1, 1, 1])

class_counts = np.bincount(a)
class_weights = 1. / class_counts
class_weights = torch.tensor(class_weights, dtype=torch.float32)
print(class_weights)
loss_fct = nn.CrossEntropyLoss(weight=class_weights)

loss_fct(a, logits.view(-1, 2))

tensor([1.0000, 0.1429])


RuntimeError: "log_softmax_lastdim_kernel_impl" not implemented for 'Long'

In [42]:
def calculate_metrics(predictions, labels): 
    probs = F.softmax(predictions, dim=1)
    preds = torch.argmax(probs, dim=1)
    
    labels = labels.view(-1) 
    
    preds_np = preds.cpu().detach().numpy()
    labels_np = labels.cpu().detach().numpy()
    probs_np = probs[:, 1].cpu().detach().numpy()
    
    precision = precision_score(labels_np, preds_np, zero_division=0)
    recall = recall_score(labels_np, preds_np, zero_division=0)
    f1 = f1_score(labels_np, preds_np, zero_division=0)
    
    if len(np.unique(labels_np)) > 1:
        auroc = roc_auc_score(labels_np, probs_np)
        auprc = average_precision_score(labels_np, probs_np)
    else:
        # If only one class is present, return default values
        auroc = 0.5  # Equivalent to random guessing
        auprc = precision  # For imbalanced data, AUPRC is often similar to precision

    metrics = {
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'auroc': auroc,
        'auprc': auprc
    }
    return metrics

In [44]:
torch.sigmoid(logits)

tensor([[0.4050, 0.5692],
        [0.4830, 0.4990],
        [0.3436, 0.5363],
        [0.4419, 0.5728],
        [0.4362, 0.5370],
        [0.4178, 0.5903],
        [0.3706, 0.6133],
        [0.4037, 0.5297]], device='cuda:0', grad_fn=<SigmoidBackward0>)

In [43]:
calculate_metrics(logits, labels)

{'precision': 0.125,
 'recall': 1.0,
 'f1_score': 0.2222222222222222,
 'auroc': 0.2857142857142857,
 'auprc': 0.16666666666666666}

array([1, 1, 1, 1, 1, 1, 1, 1])

In [34]:
torch.argmax(F.softmax(logits, dim=1)).cpu().detach().numpy(), dim=1

SyntaxError: cannot assign to function call (1300456600.py, line 1)

In [None]:
total_loss = sum(train_loss)  # 배치 크기를 곱한 손실의 총합
total_samples = len(data_loader.dataset)  # 총 샘플 수
epoch_loss = total_loss / total_samples

In [7]:
torch.sigmoid(logits[:, 1])

tensor([0.4585, 0.4122, 0.4537, 0.4502, 0.4237, 0.4980, 0.5277, 0.4858],
       device='cuda:0', grad_fn=<SigmoidBackward0>)

In [23]:
torch.sigmoid(logits)[:, 1]


tensor([0.6112, 0.4998, 0.5627, 0.5789, 0.4968, 0.4744, 0.4868, 0.4722],
       device='cuda:0', grad_fn=<SelectBackward0>)

In [24]:
preds_np = torch.argmax(logits, dim=1) .cpu().detach().numpy()
labels_np = labels.cpu().detach().numpy()
print(preds_np, labels_np)

probs_positive_np = torch.sigmoid(logits)[:, 1].cpu().detach().numpy()
print(probs_positive_np)

[1 0 1 1 1 0 1 0] [1 0 1 0 1 1 1 0]
[0.6111798  0.49978867 0.56268066 0.57892156 0.49677992 0.47443974
 0.486832   0.47216398]


In [25]:
print(precision_score(labels_np, preds_np, zero_division=0, average='binary'))
print(recall_score(labels_np, preds_np, zero_division=0, average='binary'))
print(f1_score(labels_np, preds_np, zero_division=0, average='binary'))
print(roc_auc_score(labels_np, probs_positive_np))
print(average_precision_score(labels_np, probs_positive_np))

0.8
0.8
0.8
0.5333333333333334
0.7295238095238095


In [7]:
for step, batch in tqdm(enumerate(valid_loader), desc="Steps", total=len(valid_loader)):
            
    batch = tuple(t.to(args.device) if isinstance(t, torch.Tensor) else t for t in batch)
    input_ids, attention_mask, age_ids, gender_ids, value_ids, unit_ids, time_ids, continuous_ids, position_ids, token_type_ids, task_token, labels = batch
    print(task_token)
    break

Steps:   0%|          | 0/135 [00:00<?, ?it/s]

tensor([[1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1]], device='cuda:0')





In [10]:
valid_dataset = EHR_Longformer_Dataset(Path("./datasets"), "valid", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode='readmission')
valid_loader = DataLoader(valid_dataset, 
                        batch_size=args.batch_size, 
                        shuffle=False,  # Validation should not be shuffled
                        pin_memory=args.pin_memory, 
                        num_workers=args.num_workers,
                        )
for step, batch in tqdm(enumerate(valid_loader), desc="Steps", total=len(valid_loader)):
            
    batch = tuple(t.to(args.device) if isinstance(t, torch.Tensor) else t for t in batch)
    input_ids, attention_mask, age_ids, gender_ids, value_ids, unit_ids, time_ids, continuous_ids, position_ids, token_type_ids, task_token, labels = batch
    print(task_token)
    break


Steps:   0%|          | 0/135 [00:00<?, ?it/s]

tensor([[3],
        [3],
        [3],
        [3],
        [3],
        [3],
        [3],
        [3]], device='cuda:0')





In [31]:
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
    
itemid2idx = pd.read_pickle("datasets/entire_itemid2idx.pkl")
unit2idx = pd.read_pickle("datasets/unit2idx.pkl")


train_dataset = EHR_Longformer_Dataset(Path("./datasets"), "train", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode=args.mode)
valid_dataset = EHR_Longformer_Dataset(Path("./datasets"), "valid", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode=args.mode)



train_loader = DataLoader(train_dataset, 
                            batch_size=args.batch_size,
                            shuffle=True,
                            pin_memory=args.pin_memory, 
                            num_workers=args.num_workers,
                            )

valid_loader = DataLoader(valid_dataset, 
                        batch_size=args.batch_size, 
                        shuffle=False,  # Validation should not be shuffled
                        pin_memory=args.pin_memory, 
                        num_workers=args.num_workers,
                        )
finetune_model = finetune_model.to(args.device)

for step, batch in tqdm(enumerate(train_loader), desc="Steps", total=len(train_loader)):
            
    batch = tuple(t.to(args.device) if isinstance(t, torch.Tensor) else t for t in batch)
    input_ids, attention_mask, age_ids, gender_ids, value_ids, unit_ids, time_ids, continuous_ids, position_ids, token_type_ids, task_token, labels = batch
    
    # outputs = finetune_model(
    #     input_ids = input_ids,
    #     value_ids = value_ids,
    #     unit_ids = unit_ids,
    #     time_ids = time_ids,                
    #     continuous_ids = continuous_ids,
    #     position_ids = position_ids,
    #     token_type_ids = token_type_ids,
    #     age_ids = age_ids,
    #     gender_ids = gender_ids,
    #     task_token = task_token,
    #     attention_mask=attention_mask,
    #     global_attention_mask=None,
    #     labels=labels,
    #     return_dict=True,
    # )
    print(labels)
    if step == 10:
        break

Steps:   1%|          | 10/1076 [00:00<00:16, 64.06it/s]

tensor([[0],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0]], device='cuda:0')
tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [0],
        [0]], device='cuda:0')
tensor([[0],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0]], device='cuda:0')
tensor([[0],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0]], device='cuda:0')
tensor([[0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]], device='cuda:0')
tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]], device='cuda:0')
tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]], device='cuda:0')
tensor([[0],
        [0],
        [0],
        [0],
        [1],
        [0],
        [1],
        [0]], device='cuda:0')
tensor([[0],
        [0]




In [13]:
len(train_loader)

1076

In [14]:
data = pd.read_pickle("datasets/mortality30_train_token.pkl")

In [15]:
len(data)

8607

In [None]:
parser = argparse.ArgumentParser()
    
# Required parameters
parser.add_argument("--exp_name", type=str, default="pretrain")
parser.add_argument("--save_path", type=str, default="./results")
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--checkpoint_dir", type=str, default="./checkpoints")

# Model parameters
parser.add_argument("--vocab_size", type=int, default=50265)
parser.add_argument("--itemid_size", type=int, default=600)
parser.add_argument("--unit_size", type=int, default=60)
parser.add_argument("--continuous_size", type=int, default=3)
parser.add_argument("--task_size", type=int, default=4)
parser.add_argument("--max_position_embeddings", type=int, default=4093)
parser.add_argument("--max_age", type=int, default=100)
parser.add_argument("--batch_size", type=int, default=2)
parser.add_argument("--pin_memory", type=bool, default=True)
parser.add_argument("--nodes", type=int, default=1)
parser.add_argument("--gpus", type=int, default=1)
parser.add_argument("--max_epochs", type=int, default=200)
parser.add_argument("--log_every_n_steps", type=int, default=100)
parser.add_argument("--acc", type=int, default=1)
parser.add_argument("--resume_checkpoint", type=str, default=None)

In [None]:
torch.cuda.is_available()

In [5]:
# -*- coding: utf-8 -*-
import argparse
import os
import sys
from typing import Any, Dict

import pytorch_lightning as pl
import torch
from lightning.pytorch.loggers import WandbLogger
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.strategies.ddp import DDPStrategy
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import pickle
import pandas as pd
from transformers import LongformerTokenizer

from datasets import EHR_Longformer_Dataset
from models.model import LongformerPretrain

from utils.utils import seed_everything

from pathlib import Path
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
itemid2idx = pd.read_pickle("datasets/itemid2idx.pkl")
unit2idx = pd.read_pickle("datasets/unit2idx.pkl")
    

In [6]:
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
train_dataset = EHR_Longformer_Dataset(Path("./datasets"), "train", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode='icu_los')
valid_dataset = EHR_Longformer_Dataset(Path("./datasets"), "valid", tokenizer, itemid2idx, unit2idx, use_itemid=True, mode='icu_los')
    



Unexpected exception formatting exception. Falling back to standard exception
Unexpected exception formatting exception. Falling back to standard exception
Unexpected exception formatting exception. Falling back to standard exception


In [None]:
for idx in tqdm(range(len(train_dataset))):
    try:
        print(train_dataset[idx])
    except Exception as e:
        print(f"Error at index {idx}: {e}")
    if idx == 4:
        break

In [None]:
max_num = 0
for idx in tqdm(range(len(train_dataset))):
    
    if max_num < max(train_dataset[idx][0]):
        max_num = max(train_dataset[idx][0])
    

for idx in tqdm(range(len(valid_dataset))):
    
    if max_num < max(valid_dataset[idx][0]):
        max_num = max(valid_dataset[idx][0])
        
print(max_num)
    

In [None]:
import pytorch_lightning as pl 
print(pl.__version__)

In [None]:
import torch

In [None]:
torch.__version__

In [None]:
torch.cuda.set_device(5)

In [6]:
import pandas as pd
icu_los_train = pd.read_pickle("datasets/icu_los_train_token.pkl")
admission_los_train = pd.read_pickle("datasets/admission_los_train_token.pkl")

In [5]:
label_0 = 0
label_1 = 0
for key in icu_los_train.keys():
    # print(icu_los_train[key]['label'])
    if icu_los_train[key]['label'] == 0:
        label_0 += 1
    elif icu_los_train[key]['label'] == 1:
        label_1 += 1
        
print(label_0, label_1)

6274 2332


In [7]:
label_0 = 0
label_1 = 0
for key in admission_los_train.keys():
    # print(icu_los_train[key]['label'])
    if admission_los_train[key]['label'] == 0:
        label_0 += 1
    elif admission_los_train[key]['label'] == 1:
        label_1 += 1
        
print(label_0, label_1)

5292 3314


In [4]:
tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
    
itemid2idx = pd.read_pickle("datasets/entire_itemid2idx.pkl")
unit2idx = pd.read_pickle("datasets/unit2idx.pkl")


test_dataset = EHR_Longformer_Dataset(Path("./datasets"), "test", tokenizer, itemid2idx, unit2idx, use_itemid=True)




In [7]:
test_loader = DataLoader(test_dataset, args.batch_size, shuffle=True, num_workers=0)

In [8]:
for batch in test_loader:
    print(batch)
    break

[tensor([[   4,   30,   38,  ...,    0,    0,    0],
        [   4,   30,    4,  ...,    0,    0,    0],
        [1325,    4,   30,  ...,    0,    0,    0],
        ...,
        [  26,   27,   28,  ...,    0,    0,    0],
        [   4,   27,   28,  ...,    0,    0,    0],
        [-150,    4,   30,  ...,    0,    0,    0]]), tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]]), tensor([[48],
        [26],
        [57],
        [77],
        [60],
        [88],
        [68],
        [63]]), tensor([[1],
        [0],
        [1],
        [0],
        [1],
        [1],
        [1],
        [0]]), tensor([[ 128.,   32.,   92.,  ...,    0.,    0.,    0.],
        [ 106.,   23.,  103.,  ...,    0.,    0.,    0.],
        [   1.,   74.,   15.,  ...,    0.,    0.,    0.],
        ...,
        [ 110.,   49.,   62.,  ...,    0.,    

In [None]:
for batch in test_loader:
    print(batch)
    break

In [None]:
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data import DataLoader
import wandb
from sklearn.metrics import precision_score
from accelerate import Accelerator
from accelerate import DistributedType
import os
from utils.utils import seed_everything
from transformers import LongformerTokenizer
from datasets import EHR_Longformer_Dataset
import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm
import torch.nn.functional as F
from models.longformernormal import LongformerPretrainNormal
from torch.optim.lr_scheduler import LinearLR, SequentialLR, ExponentialLR, LambdaLR, CosineAnnealingWarmRestarts
from pretrain_train import train
import logging
import sys
from torch.utils.data.distributed import DistributedSampler

model = LongformerPretrainNormal(
        vocab_size=args.vocab_size,
        itemid_size=args.itemid_size,
        max_position_embeddings=args.max_position_embeddings,
        unit_size=args.unit_size,
        continuous_size=args.continuous_size,
        task_size=args.task_size,
        max_age=args.max_age,
        gender_size=args.gender_size,
        embedding_size=args.embedding_size,
        num_hidden_layers=args.num_hidden_layers,
        num_attention_heads=args.num_attention_heads,
        intermediate_size=args.intermediate_size,
        learning_rate=args.learning_rate,
        dropout_prob=args.dropout_prob,
        gpu_mixed_precision=args.gpu_mixed_precision,
    ).to(device)