# **Computer Vision 이상치 탐지 알고리즘 경진대회**

---

start : 220401

end : 220513


# 라이브러리 불러오기

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import torch
import os
from tqdm import tqdm
from glob import glob
import cv2
import numpy as np
import pandas as pd
import PIL 
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import torchvision as tv
from PIL import Image

# 환경설정

In [None]:
from google.colab import drive
drive.mount._DEBUG = True
drive.mount('/content/drive', force_remount=True)

In [None]:
os.chdir('./drive/MyDrive/이상치/')

In [None]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
    
device

'cuda'

# 데이터 로드

In [None]:
# %%time
# !unzip "./open.zip" -d "/content/drive/MyDrive/Dacon/Computer Vision 이상치 탐지 알고리즘 경진대회/data"

In [None]:
%%time
!unzip "./data/train.zip" -d "/content/drive/MyDrive/이상치/data"

In [None]:
%%time
!unzip "./data/test.zip" -d "/content/drive/MyDrive/이상치/data"

In [None]:
train_x = sorted(glob('./data/train/*.png'))
test = sorted(glob('./data/test/*.png'))
train_csv = pd.read_csv("./data/train_df.csv")
train_label = train_csv["label"]

In [None]:
label_unique = sorted(np.unique(train_label))

label_unique_dir = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_Y = [label_unique_dir[k] for k in train_label]

In [None]:
label_unique_dir

# 데이터 증강

In [None]:
! pip install albumentations==0.4.6

In [None]:
import albumentations
import albumentations.pytorch

In [None]:
aug = albumentations.Compose([
      albumentations.Resize(224, 224),
      albumentations.HorizontalFlip(),
      albumentations.VerticalFlip(),
      albumentations.OneOf([
                          albumentations.Rotate(),
                          albumentations.ShiftScaleRotate()
 
      ], p=1),
      albumentations.augmentations.transforms.Normalize(mean=(0.5,), std=(0.5,), p=1.0),
      albumentations.pytorch.transforms.ToTensorV2(p=1.0)
      ])
aug2 = albumentations.Compose([
      albumentations.Resize(224, 224),
      albumentations.Rotate(),
      albumentations.augmentations.transforms.Normalize(mean=(0.5,), std=(0.5,), p=1.0),
      albumentations.pytorch.transforms.ToTensorV2(p=1.0)
      ])

# 데이터셋과 데이터로더

In [None]:
from sklearn.model_selection import train_test_split
train_x, val_x, train_y, val_y = train_test_split(train_x, train_Y, stratify=train_Y, test_size=0.2, random_state=42)

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, x_dir, y_dir,transform=None,transform2=None):
        super().__init__()
        self.transforms = transform
        self.transforms2 = transform2
        self.x_img = x_dir
        self.y = y_dir   

    def __len__(self):
        return len(self.x_img)

    def __getitem__(self, idx):
        x_img = self.x_img[idx]
        y = self.y[idx]

        x_img = cv2.imread(x_img)
        x_img = cv2.cvtColor(x_img, cv2.COLOR_BGR2RGB)

        if self.transforms2 :
            if 42<=y<=46:
                augmented = self.transforms2(image=x_img)
                x_img = augmented['image']
            else:
                augmented = self.transforms(image=x_img)
                x_img = augmented['image']
        else:
            augmented = self.transforms(image=x_img)
            x_img = augmented['image']


        return x_img, y

In [None]:
train_dataset = MyDataset(train_x,train_y,aug,aug2)
valid_dataset = MyDataset(val_x,val_y,aug,aug2)

In [None]:
from sklearn.utils.class_weight import compute_class_weight

class_weights= torch.FloatTensor(compute_class_weight(class_weight = "balanced" , classes=list(range(88)), y = train_Y)).to(device)
class_weights

tensor([ 4.8602,  4.4184,  4.4184,  0.2325,  6.9432,  8.1004,  8.1004,  6.9432,
         9.7205,  0.2170,  8.1004,  9.7205,  9.7205,  4.0502,  4.4184,  0.2219,
         4.4184,  4.0502,  4.8602,  4.8602,  5.4003,  0.1736,  5.4003,  5.4003,
         4.8602,  8.1004,  8.1004,  8.1004,  0.1841,  8.1004,  8.1004,  5.4003,
         5.4003,  0.1243,  5.4003,  5.4003,  4.8602,  4.8602,  5.4003,  4.8602,
         0.1984,  5.4003,  3.7386,  4.4184,  4.0502,  0.2209,  4.0502,  3.7386,
         5.4003,  4.4184,  3.7386,  4.8602,  0.1820,  9.7205,  4.0502,  0.1519,
         4.0502,  4.0502,  3.7386,  4.0502,  4.0502,  5.4003,  5.4003,  0.2113,
         6.0753,  5.4003,  6.0753,  3.2402,  0.8100,  9.7205,  9.7205,  9.7205,
         0.2282,  9.7205, 12.1506,  8.1004,  0.1968,  9.7205,  9.7205,  4.4184,
         4.8602,  6.0753,  5.4003,  6.0753,  0.2025,  5.4003,  5.4003,  6.0753],
       device='cuda:0')

In [None]:
batch_size=56
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True,num_workers=4,pin_memory =False)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=True,num_workers=4,pin_memory =False)

In [None]:
#input과 label 확인
images,labels = next(iter(train_dataloader))
print(images.shape)
print(labels.shape)
print(labels[1])

# 모델 정의

In [None]:
!pip install timm
import timm

In [None]:
!pip install efficientnet_pytorch
from efficientnet_pytorch import EfficientNet

In [None]:
#모델확인
avail_pretrained_models = timm.list_models(pretrained=True)
len(avail_pretrained_models), avail_pretrained_models[:]

In [None]:
#모델 불러오기
model = timm.create_model('swin_tiny_patch4_window7_224',pretrained=True,num_classes=88,in_chans=3)

In [None]:
sum([param.nelement() for param in model.parameters()])

# 학습 파라미터 설정

In [None]:
from sklearn.metrics import f1_score, accuracy_score
def score_function(real, pred):
    score = f1_score(real, pred, average="macro")
    return score

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)
scheduler=optim.lr_scheduler.ReduceLROnPlateau(optimizer,mode='min',patience=4)

# 학습

In [None]:
# number of epochs to train the model
n_epochs = 100
valid_loss_min = np.inf # track change in validation loss 100epoch

# keep track of training and validation loss
train_loss = torch.zeros(n_epochs)
valid_loss = torch.zeros(n_epochs)

train_F1 = torch.zeros(n_epochs)
valid_F1 = torch.zeros(n_epochs)
model.to(device)

for e in range(0, n_epochs):

   
    ###################
    # train the model #
    ###################
    model.train()
    for data, labels in tqdm(train_dataloader):
        # move tensors to GPU if CUDA is available
        data, labels = data.to(device), labels.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        logits = model(data)
        # calculate the batch loss
        loss = criterion(logits, labels)

        optimizer.zero_grad()
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss[e] += loss.item()
        # update training score
        logits=logits.argmax(1).detach().cpu().numpy().tolist()
        labels=labels.detach().cpu().numpy().tolist()

        train_F1[e] += score_function(labels,logits)

    train_loss[e] /= len(train_dataloader)
    train_F1[e] /= len(train_dataloader)
        
        
    ######################    
    # validate the model #
    ######################
    with torch.no_grad(): 
        model.eval()
        for data, labels in tqdm(valid_dataloader):
            # move tensors to GPU if CUDA is available
            data, labels = data.to(device), labels.to(device)
            # forward pass: compute predicted outputs by passing inputs to the model
            logits = model(data)
            # calculate the batch loss
            loss = criterion(logits, labels)
            # update average validation loss 
            valid_loss[e] += loss.item()
            # update training score
            logits=logits.argmax(1).detach().cpu().numpy().tolist()
            labels=labels.detach().cpu().numpy().tolist()
            valid_F1[e] += score_function(labels,logits)
            
    
    # calculate average losses
    valid_loss[e] /= len(valid_dataloader)
    valid_F1[e] /= len(valid_dataloader)
    
    scheduler.step(valid_loss[e])    
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        e, train_loss[e], valid_loss[e]))
    
    # print training/validation statistics 
    print('Epoch: {} \tTraining accuracy: {:.6f} \tValidation accuracy: {:.6f}'.format(
        e, train_F1[e], valid_F1[e]))
    
    # save model if validation loss has decreased
    if valid_loss[e] <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss[e]))
        torch.save(model.state_dict(), 'swin_tiny_patch4_window7_224.pt')
        valid_loss_min = valid_loss[e]

In [None]:
plt.plot(train_loss)
plt.plot(valid_loss) 

In [None]:
plt.plot(train_F1)
plt.plot(valid_F1)

# 모델 로드 및 테스트 데이터셋과 데이터로더

In [None]:
model.load_state_dict(torch.load('swin_tiny_patch4_window7_224.pt'))

In [None]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, x_dir,transform=None):
        super().__init__()
        self.transforms = transform
        self.x_img = x_dir 

    def __len__(self):
        return len(self.x_img)

    def __getitem__(self, idx):
        x_img = self.x_img[idx]

        x_img = cv2.imread(x_img)
        x_img = cv2.cvtColor(x_img, cv2.COLOR_BGR2RGB)

        if self.transforms:
            augmented = self.transforms(image=x_img)
            x_img = augmented['image']

        return x_img

In [None]:
means=(0.5,)
stds=(0.5,)
testtransform = albumentations.Compose([
      albumentations.Resize(300, 300),
      albumentations.augmentations.transforms.Normalize(mean=means, std=stds, p=1.0),
      albumentations.pytorch.transforms.ToTensorV2(p=1.0)
      ])

In [None]:
batch_size=64
test_dataset = TestDataset(test,testtransform)
test_loader = torch.utils.data.DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# 추론 (TTA)

In [None]:
! pip install git+https://github.com/qubvel/ttach

In [None]:
import ttach as tta

tta_transforms = tta.Compose(
    [
        tta.Rotate90(angles=[0, 90, 180, 270]),
        tta.Multiply(factors=[0.9, 1, 1.1]),
    ]
)

tta_model = tta.ClassificationTTAWrapper(model, tta_transforms)

In [None]:
tta_model.to(device)
tta_model.eval()

f_pred = []

with torch.no_grad():
    for x,_ in tqdm(test_loader):
        x = x.to(device)
        pred = tta_model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

# 제출물 생성

In [None]:
label_decoder = {val:key for key, val in label_unique_dir.items()}

f_result = [label_decoder[result] for result in f_pred]

In [None]:
submission = pd.read_csv("./data/sample_submission.csv")

submission["label"] = f_result

submission

In [None]:
submission.to_csv("swin_tiny_patch4_window7_224.csv", index = False)