## 1. 라이브러리 불러오기

In [1]:
import sys
import glob
import cv2
import numpy as np

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, utils, datasets, models
from torch.nn.modules.loss import BCEWithLogitsLoss
from torch.optim import lr_scheduler

from torch.autograd import Variable

from matplotlib import pyplot as plt
from time import time

import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import KFold

import wandb

from augraphy import *

In [2]:

train_path = 'data/dataset_50000/final_df.csv'
submission_path = 'data/sample_submission.csv'



In [3]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

## 2. Custom Dataset

In [4]:
class ImageDataset(Dataset):
    def __init__(self, csv, path, album_transform=None, augraphy_transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path 
        self.album_transform = album_transform
        self.augraphy_transform = augraphy_transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        name, _, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        
        if self.augraphy_transform:
            img = self.augraphy_transform(img)

        if self.album_transform:
            img = self.album_transform(image=img)['image']
        
        return img, target

In [5]:
class ImageDataset1(Dataset):
    def __init__(self, csv, path, album_transform=None, augraphy_transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path 
        self.album_transform = album_transform
        self.augraphy_transform = augraphy_transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        
        if self.augraphy_transform:
            img = self.augraphy_transform(img)

        if self.album_transform:
            img = self.album_transform(image=img)['image']
        
        return img, target

## 3. Training Pipeline

In [6]:
def training(model, dataloader, dataset, device, criterion, optimizer, epoch, num_epochs):
    model.train()
    train_loss = 0.0
    preds_list = []
    targets_list = []

    tbar = tqdm(dataloader)
    for images, labels in tbar:
        images = images.type(torch.cuda.FloatTensor)
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(outputs.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(labels.detach().cpu().numpy())

        tbar.set_description(f"Epoch [{epoch+1}/{num_epochs}], Train Loss : {loss.item():.4f}")

    train_loss = train_loss / (len(dataloader))
    train_acc = accuracy_score(preds_list, targets_list)
    train_f1 = f1_score(preds_list, targets_list, average='macro')

    metrics = {
        'train_loss' : train_loss,
        'train_acc' : train_acc,
        'train_f1' : train_f1
    }

    return model, metrics

def evaluation(model, dataloader, dataset, device, criterion, epoch, num_epochs):
    model.eval()
    valid_loss = 0.0
    preds_list = []
    targets_list = []

    with torch.no_grad():
        tbar = tqdm(dataloader)
        for images, labels in tbar:
            images = images.type(torch.cuda.FloatTensor)
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            valid_loss += loss.item()
            preds_list.extend(outputs.argmax(dim=1).detach().cpu().numpy())
            targets_list.extend(labels.detach().cpu().numpy())

            tbar.set_description(f"Epcoh [{epoch+1}/{num_epochs}] Valid Loss : {loss:.4f}")

    valid_loss /= len(dataloader)
    valid_acc = accuracy_score(preds_list, targets_list)
    valid_f1 = f1_score(preds_list, targets_list, average='macro')

    metrics = {
        'valid_loss' : valid_loss,
        'valid_acc' : valid_acc,
        'valid_f1' : valid_f1
    }

    return model, metrics

def training_loop(model, train_dataloader, valid_dataloader, train_dataset, valid_dataset, criterion, optimizer, device, num_epochs, model_path, model_name, patience, run):

    best_valid_loss = float('inf')
    valid_max_accuracy = -1
    valid_max_f1 = -1
    early_stop_counter = 0

    for epoch in range(num_epochs):
        model, train_metrics = training(model, train_dataloader, train_dataset, device, criterion, optimizer, epoch, num_epochs)
        model, valid_metrics = evaluation(model, valid_dataloader, valid_dataset, device, criterion, epoch, num_epochs)

        monitoring_value = {
            'train_loss' : train_metrics['train_loss'],
            'train_accuracy' : train_metrics['train_acc'],
            'train_f1' : train_metrics['train_f1'],
            'valid_loss' : valid_metrics['valid_loss'],
            'valid_accuracy' : valid_metrics['valid_acc'],
            'valid_f1' : valid_metrics['valid_f1']
        }
        run.log(monitoring_value, step=epoch)

        if valid_max_accuracy < valid_metrics['valid_acc']:
            valid_max_accuracy = valid_metrics['valid_acc']

            run.summary['best_train_acc'] = train_metrics['train_acc']
            run.summary['best_valid_acc'] = valid_metrics['valid_acc']
        
        if valid_max_f1 < valid_metrics['valid_f1']:
            valid_max_f1 = valid_metrics['valid_f1']
            torch.save(model.state_dict(), f"./model_{model_name}.pt")

            run.summary['best_train_f1'] = train_metrics['train_f1']
            run.summary['best_valid_f1'] = valid_metrics['valid_f1']

        if best_valid_loss > valid_metrics['valid_loss']:
            best_valid_loss = valid_metrics['valid_loss']
            early_stop_counter = 0
            run.summary['best_train_loss'] = train_metrics['train_loss']
            run.summary['best_valid_loss'] = valid_metrics['valid_loss']
        else:
            early_stop_counter += 1
            
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss : {train_metrics['train_loss']:.4f}, Train Acc : {train_metrics['train_acc']:.4f}, 'Train F1 : {train_metrics['train_f1']:.4f}, Valid Loss : {valid_metrics['valid_loss']:.4f}, Valid Acc : {valid_metrics['valid_acc']:.4f}, Valid F1 : {valid_metrics['valid_f1']}")

        if early_stop_counter >= patience:
            print('Early Stopping!')        
            break

    return model, valid_max_accuracy, valid_max_f1


## 4. Data Load

In [7]:
img_csv_path = 'data/dataset_50000/final_df.csv'
df_img = pd.read_csv(img_csv_path)
df_img.head()

Unnamed: 0,ID,target,target2
0,augmented_0_a1ab865095b2d312_ljh.jpg,2,2
1,augmented_1_a1ab865095b2d312_ljh.jpg,2,2
2,augmented_2_a1ab865095b2d312_ljh.jpg,2,2
3,augmented_3_a1ab865095b2d312_ljh.jpg,2,2
4,augmented_4_a1ab865095b2d312_ljh.jpg,2,2


In [8]:
img_path = 'data/dataset_50000/aug_2'
test_img_path = 'data/test/'
totensor_transform = A.Compose([A.Resize(380, 380), ToTensorV2()])
test_transform = A.Compose([
    A.Resize(380, 380),
    ToTensorV2()
])

train_dataset = ImageDataset(img_csv_path, img_path, album_transform=totensor_transform, augraphy_transform=None)
test_dataset = ImageDataset(submission_path, test_img_path, album_transform=test_transform, augraphy_transform=None)

print(len(train_dataset), len(test_dataset))

52608 3140


In [11]:
test_dataset = ImageDataset1(submission_path, test_img_path, album_transform=test_transform, augraphy_transform=None)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [12]:
train_num = int(len(train_dataset) * 0.8)
valid_num = len(train_dataset) - train_num
train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [train_num, valid_num])

print(len(train_dataset), len(valid_dataset))

33668 8418


In [14]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [4]:
print(model)

EfficientNet(
  (conv_stem): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
    

## 5. Train Model

In [3]:
import timm
model = timm.create_model('efficientnet_b4', pretrained=True)
in_features = model.classifier.in_features
classifier = nn.Sequential(
    nn.Linear(in_features, 1024),
    nn.BatchNorm1d(1024),
    nn.SiLU(),
    nn.Dropout(p=0.2),
    nn.Linear(1024, 512),
    nn.BatchNorm1d(512),
    nn.SiLU(),
    nn.Dropout(p=0.2),
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.SiLU(),
    nn.Dropout(p=0.2),
    nn.Linear(256, 17),
) 

model.classifier = classifier

NameError: name 'nn' is not defined

### Hyper Parameter 정의

In [11]:
class Cfg():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    num_epochs = 100
    batch_size=32
    model_path = ''

In [16]:
wandb.finish()

VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [21]:
run = wandb.init(project='aistages-OCR', name='effb4_add_fc1_50000')

device = Cfg.device
model = Cfg.model
criterion = Cfg.criterion
optimizer = Cfg.optimizer 
num_epochs = Cfg.num_epochs
model_name = 'effb4-add_fc_50000dataset'
model_path = Cfg.model_path
model1 = model
# run.watch(model1, criterion, log='all', log_graph=True)

model, valid_max_accuracy, valid_max_f1 = training_loop(model, train_dataloader, valid_dataloader, train_dataset, valid_dataset, criterion, optimizer, device, num_epochs, model_path, model_name, 20, run)

run.finish()

VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_accuracy,▁█
train_f1,▁█
train_loss,█▁
valid_accuracy,▁█
valid_f1,▁█
valid_loss,█▁

0,1
best_train_acc,0.98917
best_train_f1,0.98921
best_train_loss,0.03841
best_valid_acc,0.98926
best_valid_f1,0.9892
best_valid_loss,0.03491
train_accuracy,0.98917
train_f1,0.98921
train_loss,0.03841
valid_accuracy,0.98926


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114635173645285, max=1.0…

Epoch [1/100], Train Loss : 0.1208: 100%|██████████| 1316/1316 [09:15<00:00,  2.37it/s]
Epcoh [1/100] Valid Loss : 0.0026: 100%|██████████| 329/329 [01:44<00:00,  3.13it/s]


Epoch [1/100], Train Loss : 0.0304, Train Acc : 0.9902, 'Train F1 : 0.9902, Valid Loss : 0.0338, Valid Acc : 0.9905, Valid F1 : 0.9904273950007491


Epoch [2/100], Train Loss : 0.6793: 100%|██████████| 1316/1316 [09:13<00:00,  2.38it/s]
Epcoh [2/100] Valid Loss : 0.0007: 100%|██████████| 329/329 [01:46<00:00,  3.10it/s]


Epoch [2/100], Train Loss : 0.0282, Train Acc : 0.9918, 'Train F1 : 0.9919, Valid Loss : 0.0592, Valid Acc : 0.9829, Valid F1 : 0.982676661734145


Epoch [3/100], Train Loss : 0.8553: 100%|██████████| 1316/1316 [09:14<00:00,  2.37it/s]
Epcoh [3/100] Valid Loss : 0.0015: 100%|██████████| 329/329 [01:47<00:00,  3.05it/s]


Epoch [3/100], Train Loss : 0.0248, Train Acc : 0.9927, 'Train F1 : 0.9928, Valid Loss : 0.0430, Valid Acc : 0.9878, Valid F1 : 0.9877564238916312


Epoch [4/100], Train Loss : 0.3041: 100%|██████████| 1316/1316 [09:12<00:00,  2.38it/s]
Epcoh [4/100] Valid Loss : 0.0004: 100%|██████████| 329/329 [01:45<00:00,  3.12it/s]


Epoch [4/100], Train Loss : 0.0280, Train Acc : 0.9922, 'Train F1 : 0.9922, Valid Loss : 0.0436, Valid Acc : 0.9903, Valid F1 : 0.9902489850247006


Epoch [5/100], Train Loss : 0.0259: 100%|██████████| 1316/1316 [09:14<00:00,  2.37it/s]
Epcoh [5/100] Valid Loss : 0.0008: 100%|██████████| 329/329 [01:48<00:00,  3.03it/s]


Epoch [5/100], Train Loss : 0.0216, Train Acc : 0.9942, 'Train F1 : 0.9942, Valid Loss : 0.0303, Valid Acc : 0.9912, Valid F1 : 0.9910926206600552


Epoch [6/100], Train Loss : 0.0237: 100%|██████████| 1316/1316 [09:22<00:00,  2.34it/s]
Epcoh [6/100] Valid Loss : 0.0017: 100%|██████████| 329/329 [01:47<00:00,  3.05it/s]


Epoch [6/100], Train Loss : 0.0191, Train Acc : 0.9945, 'Train F1 : 0.9945, Valid Loss : 0.0337, Valid Acc : 0.9910, Valid F1 : 0.9909085023022205


Epoch [7/100], Train Loss : 0.0006: 100%|██████████| 1316/1316 [09:31<00:00,  2.30it/s]
Epcoh [7/100] Valid Loss : 0.0000: 100%|██████████| 329/329 [01:50<00:00,  2.97it/s]


Epoch [7/100], Train Loss : 0.0176, Train Acc : 0.9946, 'Train F1 : 0.9946, Valid Loss : 0.0406, Valid Acc : 0.9880, Valid F1 : 0.9879134135344434


Epoch [8/100], Train Loss : 0.0071: 100%|██████████| 1316/1316 [09:29<00:00,  2.31it/s]
Epcoh [8/100] Valid Loss : 0.0002: 100%|██████████| 329/329 [01:48<00:00,  3.03it/s]


Epoch [8/100], Train Loss : 0.0183, Train Acc : 0.9948, 'Train F1 : 0.9948, Valid Loss : 0.0244, Valid Acc : 0.9931, Valid F1 : 0.9930151008311725


Epoch [9/100], Train Loss : 0.0595: 100%|██████████| 1316/1316 [09:29<00:00,  2.31it/s]
Epcoh [9/100] Valid Loss : 0.0009: 100%|██████████| 329/329 [01:48<00:00,  3.03it/s]


Epoch [9/100], Train Loss : 0.0147, Train Acc : 0.9955, 'Train F1 : 0.9955, Valid Loss : 0.0271, Valid Acc : 0.9938, Valid F1 : 0.9937696722034327


Epoch [10/100], Train Loss : 0.0533: 100%|██████████| 1316/1316 [09:31<00:00,  2.30it/s]
Epcoh [10/100] Valid Loss : 0.0008: 100%|██████████| 329/329 [01:48<00:00,  3.05it/s]


Epoch [10/100], Train Loss : 0.0185, Train Acc : 0.9946, 'Train F1 : 0.9946, Valid Loss : 0.0467, Valid Acc : 0.9888, Valid F1 : 0.988558354438753


Epoch [11/100], Train Loss : 0.0002: 100%|██████████| 1316/1316 [09:30<00:00,  2.31it/s]
Epcoh [11/100] Valid Loss : 0.0029: 100%|██████████| 329/329 [01:48<00:00,  3.03it/s]


Epoch [11/100], Train Loss : 0.0124, Train Acc : 0.9964, 'Train F1 : 0.9964, Valid Loss : 0.0338, Valid Acc : 0.9915, Valid F1 : 0.9914965659373542


Epoch [12/100], Train Loss : 2.3352: 100%|██████████| 1316/1316 [09:31<00:00,  2.30it/s]
Epcoh [12/100] Valid Loss : 0.0001: 100%|██████████| 329/329 [01:48<00:00,  3.03it/s]


Epoch [12/100], Train Loss : 0.0156, Train Acc : 0.9966, 'Train F1 : 0.9966, Valid Loss : 0.0394, Valid Acc : 0.9891, Valid F1 : 0.989006894638645


Epoch [13/100], Train Loss : 0.0159:  43%|████▎     | 569/1316 [04:07<05:24,  2.30it/s]


KeyboardInterrupt: 

In [13]:
# run = wandb.init(project='AIStage-CV', name='effb4_add_fc')

# device = Cfg.device
# model = Cfg.model
# criterion = Cfg.criterion
# optimizer = Cfg.optimizer 
# num_epochs = Cfg.num_epochs
# model_name = 'effb4-add_fc'
# model_path = Cfg.model_path

# run.watch(model, criterion, log='all', log_graph=True)

# model, valid_max_accuracy, valid_max_f1 = training_loop(model, train_dataloader, valid_dataloader, train_dataset, valid_dataset, criterion, optimizer, device, num_epochs, model_path, model_name, 20, run)

# run.finish()

In [15]:
effb4 = timm.create_model('efficientnet_b4', pretrained=True)
in_features = effb4.classifier.in_features
classifier = nn.Sequential(
    nn.Linear(in_features, 1024),
    nn.BatchNorm1d(1024),
    nn.SiLU(), # relu -> swish 변경 
    nn.Dropout(p=0.2),
    nn.Linear(1024, 512),
    nn.BatchNorm1d(512),
    nn.SiLU(),
    nn.Dropout(p=0.2),
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.SiLU(),
    nn.Dropout(p=0.2),
    nn.Linear(256, 17),
)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
effb4.classifier = classifier
effb4.load_state_dict(torch.load(f'./model_effb4-add_fc_50000dataset.pt'))
effb4 = effb4.to(device)
effb4.eval()

EfficientNet(
  (conv_stem): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
    

In [42]:
# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=0.5),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),  # 선명하게
    ToTensorV2()
])


def apply_tta_and_predict(model, images, num_transforms=3):
    """TTA를 적용하고, 변환된 이미지들에 대한 예측 결과의 평균을 계산합니다."""
    batch_size, C, H, W = images.shape
    preds = torch.zeros((batch_size, model.num_classes), device=device)
    transform_tta = A.Compose([
                                A.Flip(p=0.5),  # 수평 뒤집기
                                A.Rotate(limit=40, p=0.5),  # 회전
                                A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),  # 선명하게
                                ToTensorV2()  # PyTorch 모델에 사용하기 위해 Tensor로 변환
                            ])
    for _ in range(num_transforms):
        # Albumentations는 numpy 이미지를 요구하므로, PyTorch 텐서를 numpy 배열로 변환합니다.
        images_np = images.cpu().numpy().transpose(0, 2, 3, 1)  # CHW -> HWC
        augmented_images = np.zeros((batch_size, C, H, W), dtype=np.float32)
        
        for i in range(batch_size):
            augmented = transform_tta(image=images_np[i])['image']
            augmented_images[i] = augmented
        
        augmented_images = torch.from_numpy(augmented_images.transpose(0, 3, 1, 2)).to(device)  # HWC -> CHW
        with torch.no_grad():
            preds += model(augmented_images)
    
    preds /= num_transforms
    return preds.argmax(dim=1).detach().cpu().numpy()


In [40]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=0.5),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),  # 선명하게
    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.type(torch.cuda.FloatTensor)
    images = images.to(device)
    with torch.no_grad():
        images_np = images.permute(0, 2, 3, 1).cpu().numpy() 
        transformed_img1 = transform_tta(image = images_np)['image']
        transformed_img2 = transform_tta(image= images_np)['image']    
        output = effb4(images)
        output_img1 = effb4(torch.tensor(transformed_img1).to(device))
        output_img2 = effb4(torch.tensor(transformed_img2).to(device))
        preds = (output + output_img1 + output_img2)/3
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())


  0%|          | 0/99 [00:00<?, ?it/s]


error: OpenCV(4.9.0) /io/opencv/modules/core/src/matrix_transform.cpp:782: error: (-215:Assertion failed) _src.dims() <= 2 in function 'flip'


In [45]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=0.5),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),  # 선명하게
    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta(image=img_np)
            transformed_data2 = transform_tta(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        
        # 모델 예측 수행
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        
        # 예측 결과 평균 계산
        preds = (output + output_img1 + output_img2) / 3
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())


100%|██████████| 99/99 [01:37<00:00,  1.01it/s]


In [46]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_3tta_notnormalize.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2


In [51]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=0.5),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전
    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta(image=img_np)
            transformed_data2 = transform_tta(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        
        # 모델 예측 수행
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        
        # 예측 결과 평균 계산
        preds = (output + output_img1 + output_img2) / 3
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())


100%|██████████| 99/99 [01:29<00:00,  1.11it/s]


In [52]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_3tta_notsharpennotnormalize.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2


In [56]:
preds_list = []

# Albumentations 변환 정의
transform_tta_1 = A.Compose([
    A.HorizontalFlip(p=1.0),
    ToTensorV2()
])

transform_tta_2 = A.Compose([
    A.VerticalFlip(p=1.0),
    ToTensorV2()
])


transform_tta_3 = A.Compose([
    A.Rotate(limit=359, p=1.0),  # 회전
    ToTensorV2()
])



for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        transformed_imgs3 = []
        
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta_1(image=img_np)
            transformed_data2 = transform_tta_2(image=img_np)
            transformed_data3 = transform_tta_3(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            transformed_imgs3.append(transformed_data3['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        transformed_img3 = torch.tensor(np.array(transformed_imgs3), dtype=torch.float).to(device)
        
        # 모델 예측 수행
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        output_img3 = effb4(transformed_img3)
        
        # 예측 결과 평균 계산
        preds = (output + output_img1 + output_img2+output_img3) / 4
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())


100%|██████████| 99/99 [01:48<00:00,  1.09s/it]


In [57]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_4tta_fliprotatenotnormalize.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2


In [95]:
preds_list = []

# Albumentations 변환 정의
transform_tta_1 = A.Compose([
    A.HorizontalFlip(p=1.0),
    ToTensorV2()
])

transform_tta_2 = A.Compose([
    A.VerticalFlip(p=1.0),
    ToTensorV2()
])


transform_tta_3 = A.Compose([
    A.Rotate(limit=359, p=1.0),  # 회전
    ToTensorV2()
])



for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        transformed_imgs3 = []
        transformed_imgs4 = []
        transformed_imgs5 = []
        
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta_1(image=img_np)
            transformed_data2 = transform_tta_2(image=img_np)
            transformed_data3 = transform_tta_3(image=img_np)
            transformed_data4 = transform_tta_4(image=img_np)
            transformed_data5 = transform_tta_5(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            transformed_imgs3.append(transformed_data3['image'].permute(1,0,2))
            transformed_imgs4.append(transformed_data4['image'].permute(1,0,2))
            transformed_imgs5.append(transformed_data5['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        transformed_img3 = torch.tensor(np.array(transformed_imgs3), dtype=torch.float).to(device)
        transformed_img4 = torch.tensor(np.array(transformed_imgs4), dtype=torch.float).to(device)        
        transformed_img5 = torch.tensor(np.array(transformed_imgs5), dtype=torch.float).to(device)        
        
        # 모델 예측 수행
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        output_img3 = effb4(transformed_img3)
        output_img4 = effb4(transformed_img4)
        output_img5 = effb4(transformed_img5)
        
        
        output_probs = F.softmax(output, dim=1)
        output_img1_probs = F.softmax(output_img1, dim=1)
        output_img2_probs = F.softmax(output_img2, dim=1)
        output_img3_probs = F.softmax(output_img3, dim=1)
        output_img4_probs = F.softmax(output_img4, dim=1)
        output_img5_probs = F.softmax(output_img5, dim=1)
        
        # 각 샘플별로 최대 확률 계산
        max_probs, _ = torch.max(output_probs, dim=1)
        
        # 확률이 0.95 이상인 샘플의 인덱스
        high_confidence_indices = max_probs >= 0.95
        # 확률이 0.95 미만인 샘플의 인덱스
        low_confidence_indices = max_probs < 0.95
        
        # 확률이 0.95 이상인 샘플에 대한 최종 예측
        final_preds = output_probs.argmax(dim=1)
        
        # 확률이 0.95 미만인 샘플에 대한 예측은 변환된 이미지들의 예측 확률 평균을 사용
        if low_confidence_indices.sum() > 0:
            avg_probs = (output_img1_probs + output_img2_probs + output_img3_probs )/3
            avg_final_preds = avg_probs.argmax(dim=1)
            
            # 확률이 0.95 미만인 샘플에 대해 평균 예측으로 최종 예측 업데이트
            final_preds[low_confidence_indices] = avg_final_preds[low_confidence_indices]
        
        # 최종 예측 리스트에 추가
        preds_list.extend(final_preds.cpu().numpy())
        


100%|██████████| 99/99 [02:42<00:00,  1.64s/it]


In [96]:
preds_list

[2,
 12,
 5,
 12,
 2,
 15,
 0,
 8,
 15,
 11,
 5,
 3,
 16,
 9,
 15,
 4,
 7,
 5,
 13,
 10,
 12,
 12,
 1,
 6,
 3,
 0,
 14,
 16,
 12,
 6,
 7,
 0,
 13,
 2,
 5,
 16,
 13,
 14,
 3,
 0,
 5,
 9,
 12,
 9,
 0,
 8,
 5,
 0,
 11,
 14,
 10,
 10,
 10,
 6,
 4,
 12,
 9,
 5,
 13,
 13,
 12,
 4,
 5,
 5,
 6,
 1,
 5,
 7,
 10,
 6,
 3,
 10,
 8,
 15,
 14,
 15,
 6,
 12,
 12,
 13,
 8,
 9,
 9,
 4,
 10,
 10,
 5,
 13,
 10,
 0,
 10,
 8,
 5,
 15,
 14,
 16,
 11,
 11,
 14,
 11,
 14,
 7,
 13,
 1,
 15,
 11,
 2,
 12,
 16,
 8,
 6,
 2,
 0,
 4,
 12,
 16,
 2,
 7,
 11,
 4,
 2,
 6,
 5,
 8,
 10,
 6,
 4,
 4,
 7,
 6,
 5,
 4,
 15,
 10,
 16,
 16,
 3,
 6,
 6,
 8,
 4,
 8,
 13,
 2,
 12,
 8,
 3,
 5,
 9,
 8,
 6,
 8,
 16,
 12,
 11,
 16,
 9,
 15,
 6,
 8,
 5,
 5,
 10,
 10,
 16,
 15,
 9,
 12,
 16,
 5,
 2,
 8,
 8,
 16,
 9,
 8,
 16,
 16,
 7,
 4,
 11,
 15,
 9,
 9,
 2,
 7,
 11,
 10,
 9,
 0,
 4,
 0,
 16,
 5,
 14,
 15,
 5,
 12,
 0,
 4,
 13,
 2,
 6,
 16,
 16,
 10,
 8,
 9,
 0,
 10,
 5,
 1,
 14,
 3,
 11,
 2,
 0,
 7,
 0,
 13,
 12,
 0,
 16,
 3,
 12,
 5,

In [97]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_first1_after.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2


In [99]:
preds_list = []

# Albumentations 변환 정의
transform_tta_1 = A.Compose([
    A.HorizontalFlip(p=1.0),
    ToTensorV2()
])

transform_tta_2 = A.Compose([
    A.VerticalFlip(p=1.0),
    ToTensorV2()
])


transform_tta_3 = A.Compose([
    A.Rotate(limit=359, p=1.0),  # 회전
    ToTensorV2()
])


transform_tta_4 = A.Compose([
    A.Rotate(limit=179, p=1.0),  # 회전
    ToTensorV2()
])


transform_tta_5 = A.Compose([
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),  # 선명하게
    ToTensorV2()
])




for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        transformed_imgs3 = []
        transformed_imgs4 = []
        transformed_imgs5 = []
        
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta_1(image=img_np)
            transformed_data2 = transform_tta_2(image=img_np)
            transformed_data3 = transform_tta_3(image=img_np)
            transformed_data4 = transform_tta_4(image=img_np)
            transformed_data5 = transform_tta_5(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            transformed_imgs3.append(transformed_data3['image'].permute(1,0,2))
            transformed_imgs4.append(transformed_data4['image'].permute(1,0,2))
            transformed_imgs5.append(transformed_data5['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        transformed_img3 = torch.tensor(np.array(transformed_imgs3), dtype=torch.float).to(device)
        transformed_img4 = torch.tensor(np.array(transformed_imgs4), dtype=torch.float).to(device)        
        transformed_img5 = torch.tensor(np.array(transformed_imgs5), dtype=torch.float).to(device)        
        
        # 모델 예측 수행
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        output_img3 = effb4(transformed_img3)
        output_img4 = effb4(transformed_img4)
        output_img5 = effb4(transformed_img5)
        
        
        output_probs = F.softmax(output, dim=1)
        output_img1_probs = F.softmax(output_img1, dim=1)
        output_img2_probs = F.softmax(output_img2, dim=1)
        output_img3_probs = F.softmax(output_img3, dim=1)
        output_img4_probs = F.softmax(output_img4, dim=1)
        output_img5_probs = F.softmax(output_img5, dim=1)
        
        # 각 샘플별로 최대 확률 계산
        max_probs, _ = torch.max(output_probs, dim=1)
        
        # 확률이 0.95 이상인 샘플의 인덱스
        high_confidence_indices = max_probs >= 0.98
        # 확률이 0.95 미만인 샘플의 인덱스
        low_confidence_indices = max_probs < 0.98
        
        # 확률이 0.95 이상인 샘플에 대한 최종 예측
        final_preds = output_probs.argmax(dim=1)
        
        # 확률이 0.95 미만인 샘플에 대한 예측은 변환된 이미지들의 예측 확률 평균을 사용
        if low_confidence_indices.sum() > 0:
            avg_probs = (output_img1_probs + output_img2_probs + output_img3_probs + output_img4_probs + output_img5_probs) / 5
            avg_final_preds = avg_probs.argmax(dim=1)
            
            # 확률이 0.95 미만인 샘플에 대해 평균 예측으로 최종 예측 업데이트
            final_preds[low_confidence_indices] = avg_final_preds[low_confidence_indices]
        
        # 최종 예측 리스트에 추가
        preds_list.extend(final_preds.cpu().numpy())
        


100%|██████████| 99/99 [02:39<00:00,  1.61s/it]


In [100]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_first1_after5tta_mean98cut.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2


In [32]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=0.5),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),  # 선명하게
    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        transformed_imgs3 = []
        transformed_imgs4 = []
        transformed_imgs5 = []
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta(image=img_np)
            transformed_data2 = transform_tta(image=img_np)
            transformed_data3 = transform_tta(image=img_np)
            transformed_data4 = transform_tta(image=img_np)
            transformed_data5 = transform_tta(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            transformed_imgs3.append(transformed_data3['image'].permute(1,0,2))
            transformed_imgs4.append(transformed_data4['image'].permute(1,0,2))
            transformed_imgs5.append(transformed_data5['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device) / 255.0
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device) / 255.0
        transformed_img3 = torch.tensor(np.array(transformed_imgs3), dtype=torch.float).to(device) / 255.0
        transformed_img4 = torch.tensor(np.array(transformed_imgs4), dtype=torch.float).to(device) / 255.0
        transformed_img5 = torch.tensor(np.array(transformed_imgs5), dtype=torch.float).to(device) / 255.0
        
        # 모델 예측 수행
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        output_img3 = effb4(transformed_img3)
        output_img4 = effb4(transformed_img4)
        output_img5 = effb4(transformed_img5)
        
        # 예측 결과 평균 계산
        preds = (output + output_img1 + output_img2 + output_img3 + output_img4 + output_img5) / 6
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())


100%|██████████| 99/99 [03:15<00:00,  1.98s/it]


In [33]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_5tta.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,2
2,00396fbc1f6cc21d.jpg,2
3,00471f8038d9c4b6.jpg,2
4,00901f504008d884.jpg,2


100%|██████████| 99/99 [00:30<00:00,  3.23it/s]


In [49]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=0.9),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.6),  # 회전
    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        transformed_imgs3 = []
        transformed_imgs4 = []
        transformed_imgs5 = []
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta(image=img_np)
            transformed_data2 = transform_tta(image=img_np)
            transformed_data3 = transform_tta(image=img_np)
            transformed_data4 = transform_tta(image=img_np)
            transformed_data5 = transform_tta(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            transformed_imgs3.append(transformed_data3['image'].permute(1,0,2))
            transformed_imgs4.append(transformed_data4['image'].permute(1,0,2))
            transformed_imgs5.append(transformed_data5['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        transformed_img3 = torch.tensor(np.array(transformed_imgs3), dtype=torch.float).to(device)
        transformed_img4 = torch.tensor(np.array(transformed_imgs4), dtype=torch.float).to(device)
        transformed_img5 = torch.tensor(np.array(transformed_imgs5), dtype=torch.float).to(device)
        
        # 모델 예측 수행
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        output_img3 = effb4(transformed_img3)
        output_img4 = effb4(transformed_img4)
        output_img5 = effb4(transformed_img5)
        
        # 예측 결과 평균 계산
        preds = (output + output_img1 + output_img2 + output_img3 + output_img4 + output_img5) / 6
        
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())


100%|██████████| 99/99 [03:15<00:00,  1.97s/it]


In [50]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_5tta_notsharpennotnormalize.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2


In [47]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=1.0),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전
    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta(image=img_np)
            transformed_data2 = transform_tta(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device) 
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device) 
        
        # 모델 예측 수행
        
                
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        
        # 예측 결과 평균 계산
        pred_indices = torch.stack([
            output.argmax(dim=1),
            output_img1.argmax(dim=1),
            output_img2.argmax(dim=1)
        ], dim=1)
        preds, _ = pred_indices.mode(dim=1)
        
    preds_list.extend(preds.detach().cpu().numpy())


100%|██████████| 99/99 [01:33<00:00,  1.06it/s]


In [48]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_3tta_softvoting_notnormalize.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2


In [None]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=1.0),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전

    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta(image=img_np)
            transformed_data2 = transform_tta(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        
        # 모델 예측 수행
        
                
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        
        combined_preds = torch.cat([output.unsqueeze(2), output_img1.unsqueeze(2), output_img2.unsqueeze(2)], dim=2)
        
        # 결합된 텐서에서 각 샘플에 대해 가장 높은 확률을 가진 클래스의 인덱스 찾기
        # max 함수는 최대값과 해당 값의 인덱스를 반환합니다. 여기서는 인덱스만 필요합니다.
        max_probs, max_indices = combined_preds.max(dim=2)
        
        # 최대 확률을 가진 인덱스 중 하나를 선택 (여기서는 가장 높은 확률의 인덱스를 사용)
        final_preds = max_indices[torch.arange(max_indices.size(0)), max_probs.argmax(dim=1)]
        
    preds_list.extend(final_preds.cpu().numpy())
        


In [None]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_3tta_argmax_Notdivide255.csv', index=False)
pred_df.head()

In [53]:
preds_list = []

# Albumentations 변환 정의
transform_tta = A.Compose([
    A.Flip(p=1.0),  # 수평 뒤집기
    A.Rotate(limit=279, p=0.5),  # 회전
    A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=0.5),  # 선명하게

    ToTensorV2()
])

for images, labels in tqdm(test_dataloader):
    images = images.float().to(device)
    with torch.no_grad():
        # 이미지 배치를 CPU로 이동시키고 NumPy 배열로 변환
        images_np = images.cpu().numpy()
        #print(images_np.shape)  32,3,380,380
        # 변환된 이미지를 저장할 배열 초기화
        transformed_imgs1 = []
        transformed_imgs2 = []
        
        # 각 이미지에 대해 변환 적용
        for img_np in images_np:
            # 이미지 하나에 대해 변환 적용
            # print(img_np.shape) 3,380,380
            transformed_data1 = transform_tta(image=img_np)
            transformed_data2 = transform_tta(image=img_np)
            
            # 변환된 이미지 배열에 추가
            transformed_imgs1.append(transformed_data1['image'].permute(1,0,2))
            transformed_imgs2.append(transformed_data2['image'].permute(1,0,2))
            
        
        # NumPy 배열로 변환된 이미지들을 PyTorch 텐서로 변환
        transformed_img1 = torch.tensor(np.array(transformed_imgs1), dtype=torch.float).to(device)
        transformed_img2 = torch.tensor(np.array(transformed_imgs2), dtype=torch.float).to(device)
        
        # 모델 예측 수행
        
                
        output = effb4(images)
        output_img1 = effb4(transformed_img1)
        output_img2 = effb4(transformed_img2)
        
        combined_preds = torch.cat([output.unsqueeze(2), output_img1.unsqueeze(2), output_img2.unsqueeze(2)], dim=2)
        
        # 결합된 텐서에서 각 샘플에 대해 가장 높은 확률을 가진 클래스의 인덱스 찾기
        # max 함수는 최대값과 해당 값의 인덱스를 반환합니다. 여기서는 인덱스만 필요합니다.
        max_probs, max_indices = combined_preds.max(dim=2)
        
        # 최대 확률을 가진 인덱스 중 하나를 선택 (여기서는 가장 높은 확률의 인덱스를 사용)
        final_preds = max_indices[torch.arange(max_indices.size(0)), max_probs.argmax(dim=1)]
        
    preds_list.extend(final_preds.cpu().numpy())
        


100%|██████████| 99/99 [01:37<00:00,  1.01it/s]


In [54]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()
pred_df.to_csv('./effb4-add_50000_3tta_argmax_sharpenNotdivide255.csv', index=False)
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,0
1,00091bffdffd83de.jpg,0
2,00396fbc1f6cc21d.jpg,0
3,00471f8038d9c4b6.jpg,0
4,00901f504008d884.jpg,0


In [82]:
preds_list = []

for images, labels in tqdm(test_dataloader):
    images = images.type(torch.cuda.FloatTensor)
    images = images.to(device)

    with torch.no_grad():
        preds = effb4(images)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:29<00:00,  3.37it/s]


In [83]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [84]:
sample_submission_df = pd.read_csv(submission_path)
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [85]:
pred_df.to_csv('./effb4-add_50000-double-check.csv', index=False)

In [31]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,12
4,00901f504008d884.jpg,2
