In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

import torch, torchvision
import torch.nn.functional as F
from torch import nn, optim
from torchvision import transforms, datasets
import os
import copy, os
from tqdm import trange

### 세팅 

In [None]:
# Computational device
# Device will be set to GPU if it is available.(you should install valid Pytorch version with CUDA. Otherwise, it will be computed using CPU)
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
DIR = './results/gridsearch16'
print("Using Device:", DEVICE)

Using Device: cuda


In [3]:
# Fashion MNIST dataset
trainset = datasets.FashionMNIST(
    root      = './.data/', train = True,
    download  = True,
    transform = transforms.ToTensor())
testset = datasets.FashionMNIST(
    root      = './.data/', train     = False,
    download  = True,
    transform = transforms.ToTensor())

In [4]:
SELECT_NORMAL = 2 # Set 2 class as train dataset.
trainset.data = trainset.data[trainset.targets == SELECT_NORMAL]
trainset.targets = trainset.targets[trainset.targets == SELECT_NORMAL] # Set 2 class as train dataset.

test_label = [2,4,6] # Define actual test class that we use
actual_testdata = torch.isin(testset.targets, torch.tensor(test_label))
testset.data = testset.data[actual_testdata]
testset.targets = testset.targets[actual_testdata]

test_loader = torch.utils.data.DataLoader(
    dataset     = testset, batch_size  = 1,
    shuffle     = False,num_workers = 2)

train_data_size = len(trainset)
test_data_size = len(testset)

print("Train data size:", train_data_size, "Test data size:", test_data_size)

Train data size: 6000 Test data size: 3000


#### 데이터 증강 기법 사용 class 

In [5]:
class GaussianNoise(nn.Module):
    def __init__(self, std=0.1):
        super().__init__()
        self.std = std

    def forward(self, x):
        if self.training:
            noise = x.data.new(x.size()).normal_(0, self.std)
            return x + noise
        return x

In [6]:
# 몇 배로 Augmentation을 할 것인지 알려주면 해당 배수만큼 Augmentation을 수행하는 클래스
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(15),                  # 20도는 좀 크고, 15도 이하 권장
    transforms.RandomCrop(28, padding=2),           # shift 효과
    GaussianNoise(0.1),                          # Gaussian Noise 추가  
])

class AugmentedDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, transform=None, augmentation_factor=1):
        '''
        dataset: 원본 데이터셋
        transform: 증강을 위한 transform
        augmentation_factor: 몇 배로 Augmentation (1 포함)
        '''
        self.dataset = dataset
        self.transform = transform
        self.augmentation_factor = augmentation_factor
        self.original_length = len(dataset)

    def __len__(self):
        # 전체 데이터 수 = 원본 * 배수
        return self.original_length * self.augmentation_factor

    def __getitem__(self, idx):
        # 원본 인덱스
        original_idx = idx % self.original_length
        x, y = self.dataset[original_idx]

        # factor == 1 이거나 첫 번째 패스는 원본 사용
        if self.augmentation_factor > 1 and idx >= self.original_length:
            if self.transform:
                x = self.transform(x)
        
        return x, y


In [7]:
# 데이터셋을 먼저 train과 val로 나누고, train에 대해서만 증강을 적용
n_val = int(len(trainset) * 0.2)
n_train = len(trainset) - n_val
BATCH_SIZE = 1024

augset, valset = torch.utils.data.random_split(trainset, [n_train, n_val], generator=torch.Generator().manual_seed(2025))

augset = AugmentedDataset(augset, transform=transform, augmentation_factor=10) # augmentation_factor = 10
# valset은 증강을 적용하지 않음

train_loader = torch.utils.data.DataLoader(
    dataset     = augset, batch_size  = BATCH_SIZE,
    shuffle     = True,num_workers = 0) 

val_loader = torch.utils.data.DataLoader(
    dataset     = valset, batch_size = BATCH_SIZE,
    shuffle     = False,num_workers = 0)

# data size check
print("Train data size:", len(augset),"Val data size:", len(valset),"Test data size:", len(testset))

Train data size: 48000 Val data size: 1200 Test data size: 3000


### 모델 및 Training Setting 

In [8]:
class EarlyStopping():
    def __init__(self, patience=10, verbose=False, delta=0):
        '''
        patience (int): 얼마나 기다릴지
        verbose (bool): True일 경우 각 epoch의 loss 출력
        delta (float): 개선이 되었다고 인정되는 최소한의 loss
        '''
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.inf
        self.delta = delta

    def __call__(self, val_loss, model):
        score = -val_loss # validation loss가 작을수록 좋다고 가정

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''validation loss가 감소하면 모델을 저장한다.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

### 모델 및 loss 불러오기 

In [9]:
import torch.nn as nn
import model

def get_model_classes():
    """
    model 폴더 내에서 nn.Module 기반 클래스만 자동으로 dict로 반환
    """
    model_classes = {}
    for k in dir(model):
        obj = getattr(model, k)
        if isinstance(obj, type) and issubclass(obj, nn.Module) and obj.__module__.startswith('model.'):
            model_classes[k] = obj
    return model_classes

model_classes = {name: cls() for name, cls in get_model_classes().items()}
print("Available models:", model_classes.keys())

Available models: dict_keys(['Autoencoder', 'CAE', 'CVAE', 'DeepCAE', 'DeepCVAE', 'DeepVAE', 'DenoisingAutoencoder', 'DiffusionUNet', 'GANomaly', 'HybridCAE', 'RobustAutoencoder', 'SimpleDDPM', 'SkipConnectionAutoencoder', 'TransformerAnomalyDetector', 'VAE'])


In [10]:
# loss function
# loss function 추천 조합
from loss.losses import FlexibleLoss, FlexibleDiffusionLoss

reconstruction_loss = {
    "MSE": FlexibleLoss(mode="mse"),
    "MSE+Gradient": FlexibleLoss(mode="mse+gradient", beta=1.0, gamma=0.1),
    "MSE+MS-SSIM": FlexibleLoss(mode="mse+ms-ssim", beta=1.0, alpha=0.3),
    "Charbonnier+MS-SSIM": FlexibleLoss(mode="charbonnier+ms-ssim", beta=1.0, alpha=0.5),
    "Charbonnier+Gradient": FlexibleLoss(mode="charbonnier+gradient", beta=1.0, gamma=0.1),
}

diffusion_loss = {
    "MSE": FlexibleDiffusionLoss(mode="mse"),
    "MSE+Gradient": FlexibleDiffusionLoss(mode="mse+gradient", beta=1.0, alpha=0.1),
    "Charbonnier": FlexibleDiffusionLoss(mode="charbonnier", beta=1.0),
    "Charbonnier+Gradient": FlexibleDiffusionLoss(mode="charbonnier+gradient", beta=1.0, alpha=0.1)
}
loss_functions = {
    "reconstruction": reconstruction_loss,
    "diffusion": diffusion_loss,
}

print("Available reconstruction loss functions:", reconstruction_loss.keys())
print("Available diffusion loss functions:", diffusion_loss.keys())

Available reconstruction loss functions: dict_keys(['MSE', 'MSE+Gradient', 'MSE+MS-SSIM', 'Charbonnier+MS-SSIM', 'Charbonnier+Gradient'])
Available diffusion loss functions: dict_keys(['MSE', 'MSE+Gradient', 'Charbonnier', 'Charbonnier+Gradient'])


In [None]:
def model_delete(models, loss_functions, checkpoint_dir):
    '''
    이미 학습된 모델 + loss 조합을 models, loss_functions에서 제거
    checkpoint_dir/{model_name}_{loss_name}.pth 존재 여부로 판단
    '''
    models_to_delete = []

    for model_name in list(models.keys()):
        # 현재 모델이 사용 가능한 loss 목록
        if hasattr(models[model_name], 'T'):
            losses = list(loss_functions['diffusion'].keys())
        else:
            losses = list(loss_functions['reconstruction'].keys())
        
        # 해당 모델의 모든 loss 조합이 학습되었는지 확인
        all_ckpt_exist = all(
            os.path.exists(os.path.join(checkpoint_dir, f"{model_name}_{loss.replace('+','and')}.pth"))
            for loss in losses
        )

        if all_ckpt_exist:
            print(f"✅ {model_name} all checkpoints exist. Removing from list.")
            models_to_delete.append(model_name)
        else:
            print(f"❌ {model_name} has missing checkpoints. Keeping in list.")
    
    # 실제 삭제
    for model_name in models_to_delete:
        del models[model_name]

    return models
# 모델과 loss function 조합을 삭제
model_classes = model_delete(model_classes, loss_functions, DIR)
print("Remaining models:", model_classes.keys())

❌ Autoencoder has missing checkpoints. Keeping in list.
❌ CAE has missing checkpoints. Keeping in list.
❌ CVAE has missing checkpoints. Keeping in list.
❌ DeepCAE has missing checkpoints. Keeping in list.
❌ DeepCVAE has missing checkpoints. Keeping in list.
❌ DeepVAE has missing checkpoints. Keeping in list.
❌ DenoisingAutoencoder has missing checkpoints. Keeping in list.
❌ DiffusionUNet has missing checkpoints. Keeping in list.
❌ GANomaly has missing checkpoints. Keeping in list.
❌ HybridCAE has missing checkpoints. Keeping in list.
❌ RobustAutoencoder has missing checkpoints. Keeping in list.
❌ SimpleDDPM has missing checkpoints. Keeping in list.
❌ SkipConnectionAutoencoder has missing checkpoints. Keeping in list.
❌ TransformerAnomalyDetector has missing checkpoints. Keeping in list.
❌ VAE has missing checkpoints. Keeping in list.
Remaining models: dict_keys(['Autoencoder', 'CAE', 'CVAE', 'DeepCAE', 'DeepCVAE', 'DeepVAE', 'DenoisingAutoencoder', 'DiffusionUNet', 'GANomaly', 'HybridC

### Trainer 실행 

In [12]:
# Check the shape of a batch from train_loader and test_loader
train_images, train_labels = next(iter(train_loader))
test_images, test_labels = next(iter(test_loader))

print("Train batch image shape:", train_images.shape)
print("Train batch label shape:", train_labels.shape)
print("Test batch image shape:", test_images.shape)
print("Test batch label shape:", test_labels.shape)

Train batch image shape: torch.Size([1024, 1, 28, 28])
Train batch label shape: torch.Size([1024])
Test batch image shape: torch.Size([1, 1, 28, 28])
Test batch label shape: torch.Size([1])


In [None]:
EPOCHS = 200
PATIENCE = 20
# GridSearchTrainerfp16
from trainer import GridSearchTrainerFP16, GridSearchTrainer
trainer = GridSearchTrainerFP16(
    models=model_classes,
    criterions_dict=loss_functions,
    train_loader=train_loader,
    val_loader=val_loader,
    n_epochs=EPOCHS,
    patience=PATIENCE,
    save_dir=f'{DIR}/checkpoints',
    verbose=False, 
    device=DEVICE,
    lr=1e-3 * BATCH_SIZE / 256 # default learning rate for AdamW
)
results = trainer.run()

results_df = pd.DataFrame(results)
# Save the results to a CSV file
results_df.to_csv(f'{DIR}/training.csv', index=False)

Total Models: 15
Reconstruction Losses: 5 Diffusion Losses: 4
Total Combinations: 74
▶ Training [Autoencoder] with [MSE] (FP16)


Autoencoder | MSE (FP16):  11%|█         | 22/200 [05:01<40:24, 13.62s/it]

In [None]:
results_df

Unnamed: 0,model,loss,best_val_loss,save_path
0,Autoencoder,MSE,0.049954,./checkpoints/Autoencoder_MSE.pth
1,CAE,MSE,0.012909,./checkpoints/CAE_MSE.pth
2,CVAE,MSE,0.001859,./checkpoints/CVAE_MSE.pth
3,DeepCAE,MSE,0.037144,./checkpoints/DeepCAE_MSE.pth
4,DeepCVAE,MSE,0.000254,./checkpoints/DeepCVAE_MSE.pth
5,DenoisingAutoencoder,MSE,0.009787,./checkpoints/DenoisingAutoencoder_MSE.pth
6,DiffusionUNet,MSE,0.033451,./checkpoints/DiffusionUNet_MSE.pth
7,GANomaly,MSE,17.12288,./checkpoints/GANomaly_MSE.pth
8,HybridCAE,MSE,0.046011,./checkpoints/HybridCAE_MSE.pth
9,RobustAutoencoder,MSE,0.048929,./checkpoints/RobustAutoencoder_MSE.pth


### eval

In [None]:
import os
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, f1_score, accuracy_score
from tqdm import tqdm

class Evaluator:
    def __init__(self, checkpoint_dir, val_loader, test_loader, device=None, percentile=0.95, save_dir='./eval_results'):
        self.checkpoint_dir = checkpoint_dir
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device if device else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.percentile = percentile
        self.save_dir = save_dir
        os.makedirs(self.save_dir, exist_ok=True)

    def load_model(self, model_name):
        model_class = get_model_classes()[model_name]
        model = model_class().to(self.device)
        return model

    def compute_score(self, model, x):
        if hasattr(model, 'T'):
            t = torch.randint(0, model.T, (x.size(0),), device=x.device)
            noise_pred, noise = model(x, t)
            return F.mse_loss(noise_pred, noise, reduction='none').view(x.size(0), -1).mean(dim=1)
        output = model(x)
        if isinstance(output, tuple):
            output = output[0]
        return F.mse_loss(output, x, reduction='none').view(x.size(0), -1).mean(dim=1)

    def get_scores(self, loader, model):
        model.eval()
        scores, labels = [], []
        with torch.no_grad():
            for x, y in tqdm(loader, desc="Scoring"):
                x = x.to(self.device)
                score = self.compute_score(model, x)
                scores.append(score.cpu())
                labels.append(y.cpu())
        return torch.cat(scores).numpy(), torch.cat(labels).numpy()

    def run(self):
        checkpoint_files = [f for f in os.listdir(self.checkpoint_dir) if f.endswith(".pth")]
        results = []

        for ckpt in checkpoint_files:
            print(f"\n▶ Evaluating {ckpt}")

            model_name, loss_name = ckpt[:-4].split("_", 1)
            model = self.load_model(model_name)
            model.load_state_dict(torch.load(os.path.join(self.checkpoint_dir, ckpt)))

            val_scores, _ = self.get_scores(self.val_loader, model)
            threshold = np.percentile(val_scores, self.percentile * 100)
            print(f" >> Threshold ({self.percentile*100:.0f}%) = {threshold:.4f}")

            test_scores, test_labels = self.get_scores(self.test_loader, model)
            test_labels = (test_labels != 2).astype(int)
            preds = (test_scores > threshold).astype(int)

            auc_score = roc_auc_score(test_labels, test_scores)
            precision, recall, _ = precision_recall_curve(test_labels, test_scores)
            pr_auc = auc(recall, precision)
            f1 = f1_score(test_labels, preds)
            acc = accuracy_score(test_labels, preds)  # ⭕ Accuracy 추가

            print(f"ROC-AUC: {auc_score:.4f} | PR-AUC: {pr_auc:.4f} | F1: {f1:.4f} | ACC: {acc:.4f}")

            results.append({
                "checkpoint": ckpt,
                "threshold": threshold,
                "roc_auc": auc_score,
                "pr_auc": pr_auc,
                "f1": f1,
                "acc": acc  # ⭕ 기록
            })

        df = pd.DataFrame(results)
        return df


In [None]:
evaluator = Evaluator(
    checkpoint_dir=f'{DIR}/checkpoints',
    val_loader=val_loader,
    test_loader=test_loader,
    device=DEVICE,
    percentile=0.95,
    save_dir=f'{DIR}/eval_results'  # ⭕ 저장할 디렉토리
)
eval_results = evaluator.run()
eval_results.to_csv('{DIR}/eval_results/eval_results.csv', index=False)  # ⭕ CSV로 저장


▶ Evaluating Autoencoder_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 40.00it/s]


 >> Threshold (@95%) = 0.0886


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 720.66it/s] 


ROC-AUC: 0.5332 | PR-AUC: 0.6799 | F1: 0.1028

▶ Evaluating CAE_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 21.05it/s]


 >> Threshold (@95%) = 0.0189


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 632.50it/s] 


ROC-AUC: 0.4171 | PR-AUC: 0.6255 | F1: 0.1143

▶ Evaluating CVAE_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 22.22it/s]


 >> Threshold (@95%) = 0.1240


Scoring: 100%|██████████| 3000/3000 [00:05<00:00, 585.12it/s] 


ROC-AUC: 0.5134 | PR-AUC: 0.6640 | F1: 0.0460

▶ Evaluating DeepCAE_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 17.86it/s]


 >> Threshold (@95%) = 0.0672


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 638.66it/s] 


ROC-AUC: 0.4286 | PR-AUC: 0.6079 | F1: 0.0400

▶ Evaluating DeepCVAE_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 27.78it/s]


 >> Threshold (@95%) = 0.1207


Scoring: 100%|██████████| 3000/3000 [00:05<00:00, 590.45it/s] 


ROC-AUC: 0.5078 | PR-AUC: 0.6662 | F1: 0.0664

▶ Evaluating DenoisingAutoencoder_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 19.42it/s]


 >> Threshold (@95%) = 0.0152


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 664.46it/s] 


ROC-AUC: 0.4219 | PR-AUC: 0.6304 | F1: 0.1182

▶ Evaluating DiffusionUNet_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00,  5.04it/s]


 >> Threshold (@95%) = 0.0500


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 636.69it/s] 


ROC-AUC: 0.4081 | PR-AUC: 0.6024 | F1: 0.0576

▶ Evaluating GANomaly_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 33.90it/s]


 >> Threshold (@95%) = 0.0289


Scoring: 100%|██████████| 3000/3000 [00:05<00:00, 591.80it/s] 


ROC-AUC: 0.3761 | PR-AUC: 0.5971 | F1: 0.0839

▶ Evaluating HybridCAE_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 32.26it/s]


 >> Threshold (@95%) = 0.0840


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 610.29it/s] 


ROC-AUC: 0.5142 | PR-AUC: 0.6663 | F1: 0.0769

▶ Evaluating RobustAutoencoder_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 42.56it/s]


 >> Threshold (@95%) = 0.0821


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 667.74it/s] 


ROC-AUC: 0.5418 | PR-AUC: 0.6996 | F1: 0.1296

▶ Evaluating SimpleDDPM_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00,  4.51it/s]


 >> Threshold (@95%) = 0.6907


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 656.53it/s] 


ROC-AUC: 0.5171 | PR-AUC: 0.6787 | F1: 0.0863

▶ Evaluating SkipConnectionAutoencoder_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00,  7.54it/s]


 >> Threshold (@95%) = 0.0036


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 624.66it/s] 


ROC-AUC: 0.4336 | PR-AUC: 0.6287 | F1: 0.0803

▶ Evaluating TransformerAnomalyDetector_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00,  4.34it/s]


 >> Threshold (@95%) = 0.4695


Scoring: 100%|██████████| 3000/3000 [00:06<00:00, 481.29it/s]


ROC-AUC: 0.4930 | PR-AUC: 0.6419 | F1: 0.0355

▶ Evaluating VAE_MSE.pth


Scoring: 100%|██████████| 2/2 [00:00<00:00, 44.44it/s]


 >> Threshold (@95%) = 0.1198


Scoring: 100%|██████████| 3000/3000 [00:04<00:00, 680.33it/s] 

ROC-AUC: 0.5112 | PR-AUC: 0.6633 | F1: 0.0544





Unnamed: 0,checkpoint,threshold,roc_auc,pr_auc,f1
0,Autoencoder_MSE.pth,0.088597,0.533179,0.679935,0.102825
1,CAE_MSE.pth,0.018927,0.417074,0.625503,0.114312
2,CVAE_MSE.pth,0.123979,0.513384,0.663999,0.046043
3,DeepCAE_MSE.pth,0.067239,0.428583,0.607922,0.039981
4,DeepCVAE_MSE.pth,0.120728,0.507833,0.666234,0.066382
5,DenoisingAutoencoder_MSE.pth,0.015244,0.42191,0.63039,0.118182
6,DiffusionUNet_MSE.pth,0.050046,0.408063,0.60237,0.057574
7,GANomaly_MSE.pth,0.028873,0.376113,0.597055,0.083877
8,HybridCAE_MSE.pth,0.083981,0.514245,0.666335,0.076887
9,RobustAutoencoder_MSE.pth,0.082134,0.541758,0.699636,0.129562


In [None]:
eval_results