# 구글 드라이브 마운트

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
TRAIN_JSON_PATH = '/content/drive/MyDrive/Colab Notebooks/Data/label/train/'
VALID_JSON_PATH = '/content/drive/MyDrive/Colab Notebooks/Data/label/val/'

TRAIN_IMG_PATH = '/content/drive/MyDrive/Colab Notebooks/Data/img/train/'
VALID_IMG_PATH = '/content/drive/MyDrive/Colab Notebooks/Data/img/val/'

TRAIN_SEGMENT_PATH = '/content/drive/MyDrive/Colab Notebooks/myData/segmentation/train/'
VALID_SEGMENT_PATH = '/content/drive/MyDrive/Colab Notebooks/myData/segmentation/val/'

CSV_PATH = '/content/drive/MyDrive/Colab Notebooks/myData/'

PREPROC_TRAIN_IMG_PATH = '/content/drive/MyDrive/Colab Notebooks/myData/preprocessed/train/'
PREPROC_VALID_IMG_PATH = '/content/drive/MyDrive/Colab Notebooks/myData/preprocessed/val/'

# 라이브러리 마운트

In [1]:
import os
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset, ConcatDataset
from torchvision import transforms, datasets
import torchvision.transforms as tt
import torchvision
import copy
import time
from tqdm import tqdm
import json

# 모델 저장용

In [None]:
!gdown --id '1i3Lmt83jhwC6GtCHtSnF-pOHPZGe8NED' --output "/content/model_eff.pth"

%cd /content/

Downloading...
From (original): https://drive.google.com/uc?id=1i3Lmt83jhwC6GtCHtSnF-pOHPZGe8NED
From (redirected): https://drive.google.com/uc?id=1i3Lmt83jhwC6GtCHtSnF-pOHPZGe8NED&confirm=t&uuid=30bd4d45-f743-4bbc-94d6-7bef27f165e6
To: /content/model_eff.pth
100% 366M/366M [00:07<00:00, 50.9MB/s]
/content


# GPU 혹은 CPU

In [None]:
def get_device():
    if True and torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
get_device()

device(type='cuda')

# 데이터 입력 처리


In [None]:
from PIL import Image, ExifTags
import io
def exif_delete(image_path):
    try:
        image = Image.open(image_path)
        exif = image.info.get('exif')
        if exif:
            exif_dict = {ExifTags.TAGS[k]: v for k, v in Image.open(image_path)._getexif().items() if k in ExifTags.TAGS}
            if 'Orientation' in exif_dict:
                orientation = exif_dict['Orientation']

                if orientation == 3:
                    #print(os.path.basename(image_path))
                    #print(1)
                    image = image.rotate(180, expand=True)
                elif orientation == 6:
                    image = image.rotate(270, expand=True)
                    #print(os.path.basename(image_path))
                    #print(2)
                elif orientation == 8:
                    image = image.rotate(90, expand=True)
                    #print(os.path.basename(image_path))
                    #print(3)

                exif_dict['Orientation'] = 1
                img_byte_arr = io.BytesIO()
                image.save(img_byte_arr, format='JPEG', exif=image.info['exif'])
                img_byte_arr.seek(0)
                return Image.open(img_byte_arr)
            else:
                return image
        else:
            return image

    except Exception as e:
        print(f"Error: {e}")
        return None


In [None]:
class CropAroundFace:
    def __init__(self, coordinates_csv):
        if coordinates_csv is not None:
            self.coordinates = self.load_coordinates(coordinates_csv)
        else:
            self.coordinates = None

    def load_coordinates(self, csv_file):
        df = pd.read_csv(csv_file)
        return df

    def __call__(self, img, image_path):
        #img=exif_delete(image_path)
        if self.coordinates is None:
            return img
        image_id = os.path.basename(image_path)  # 이미지 파일명 추출
        coord = self.coordinates[self.coordinates['filename'] == image_id]

        if coord.empty:
            print(f"image {image_id} not found.")
            return img  # 이미지를 그대로 반환

        minX = coord.iloc[0]['minX']
        maxX = coord.iloc[0]['maxX']
        minY = coord.iloc[0]['minY']
        maxY = coord.iloc[0]['maxY']

        img = img.crop((minX, minY, maxX, maxY))
        return img

# 커스텀 ImageFolder 정의
class CustomImageFolder(ImageFolder):
    def __init__(self, root, transform=None, target_transform=None, cropper=None):
        super().__init__(root, transform=transform, target_transform=target_transform)
        self.cropper = cropper

    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = self.loader(path)
        if self.cropper is not None:
            sample = self.cropper(sample, path)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return sample, target

In [None]:
random_seed = 1523
torch.manual_seed(random_seed)

#하이퍼 파라미터
BATCH_SIZE = 64
image_size= 224
df_train = CSV_PATH + 'train_ver3.csv'
df_valid = CSV_PATH + 'valid_ver3.csv'
crop_face_train = CropAroundFace(df_train)
crop_face_valid = CropAroundFace(df_valid)
no_crop = CropAroundFace(None)
origin_transforms = tt.Compose([
                               tt.Resize((image_size,image_size)),
                               #tt.Grayscale(num_output_channels=3),
                               #tt.RandomEqualize(p=1.0),
                               #tt.RandomHorizontalFlip(p=0.3),
                               #tt.RandomRotation((-10,10)),
                               #tt.ColorJitter(brightness=(0.5, 0.9), contrast=(0.4, 0.8), saturation=(0.7, 0.9)),
                               #tt.RandomEqualize(p=1.0),
                               tt.ToTensor(),
                               tt.Normalize([0.5630258, 0.44020364, 0.38816002], [0.24872069, 0.20780964, 0.18916753],inplace=True)
                               #tt.Normalize([0.49906865, 0.49850065, 0.49802032], [0.28936213, 0.2896722, 0.2898643],inplace=True)
                               #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                               ])
at_train_transforms1 = tt.Compose([
                              tt.Resize((image_size,image_size)),
                              #tt.Grayscale(num_output_channels=3),
                              #tt.RandomEqualize(p=1.0),
                              tt.RandomHorizontalFlip(p=1.0),
                              #tt.RandomRotation((-12,12)),
                              tt.ToTensor(),
                              tt.Normalize([0.5630291, 0.4402049, 0.3881598], [0.24872099, 0.2078098, 0.18916771],inplace=True)
                              #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                              ])
at_train_transforms2 = tt.Compose([
                              tt.Resize((image_size,image_size)),
                              #tt.Grayscale(num_output_channels=3),
                              #tt.RandomEqualize(p=1.0),
                              #tt.RandomHorizontalFlip(p=0.5),
                              tt.RandomRotation((-20,20)),
                              #tt.ColorJitter(brightness=(0.5, 0.8), contrast=(0.4, 0.8), saturation=(0.7, 0.9)),
                              tt.ToTensor(),
                              tt.Normalize([0.5412756, 0.42083004, 0.3695388], [0.27376023, 0.22556551, 0.20402727],inplace=True)
                              #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                              ])
at_train_transforms3 = tt.Compose([
                              tt.Resize((image_size,image_size)),
                              #tt.Grayscale(num_output_channels=3),
                              #tt.RandomEqualize(p=1.0),
                              #tt.RandomHorizontalFlip(p=0.5),
                              #tt.RandomRotation((-12,12)),
                              tt.ColorJitter(brightness=(0.7, 1.3)),
                              tt.ToTensor(),
                              tt.Normalize([0.5843284, 0.4594015, 0.40504286], [0.26762614, 0.2274408, 0.2068571],inplace=True)
                              #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                              ])
at_train_transforms4 = tt.Compose([
                              tt.Resize((image_size,image_size)),
                              #tt.Grayscale(num_output_channels=3),
                              #tt.RandomEqualize(p=1.0),
                              tt.GaussianBlur(kernel_size=(19, 19), sigma=(1.0, 2.0)),
                              #tt.RandomHorizontalFlip(p=0.5),
                              #tt.RandomRotation((-12,12)),
                              #tt.ColorJitter(brightness=(0.5, 0.8), contrast=(0.4, 0.8), saturation=(0.7, 0.9)),
                              tt.ToTensor(),
                              tt.Normalize([0.56310797, 0.44026062, 0.38820478], [0.2432182, 0.20207751, 0.1836703],inplace=True)
                              #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                              ])
at_train_transforms5 = tt.Compose([
                              tt.Resize((image_size,image_size)),
                              #tt.Grayscale(num_output_channels=3),
                              #tt.RandomEqualize(p=1.0),
                              tt.RandomAffine(degrees =0,translate=(0.2, 0.2)),
                              #tt.RandomHorizontalFlip(p=0.5),
                              #tt.RandomRotation((-12,12)),
                              #tt.ColorJitter(brightness=(0.5, 0.8), contrast=(0.4, 0.8), saturation=(0.7, 0.9)),
                              tt.ToTensor(),
                              tt.Normalize([0.47796503, 0.3703487, 0.3250615], [0.3130017, 0.2526793, 0.22655305],inplace=True)
                              #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                              ])
at_train_transforms6 = tt.Compose([
                              tt.Resize((image_size,image_size)),
                              #tt.Grayscale(num_output_channels=3),
                              #tt.RandomEqualize(p=1.0),
                              tt.RandomAffine(degrees=0,scale=(0.6, 0.8),translate=(0.2, 0.2)),
                              #tt.RandomHorizontalFlip(p=0.5),
                              #tt.RandomRotation((-12,12)),
                              #tt.ColorJitter(brightness=(0.5, 0.8), contrast=(0.4, 0.8), saturation=(0.7, 0.9)),
                              tt.ToTensor(),
                              tt.Normalize([0.27812216, 0.21746288, 0.1917359], [0.33143228, 0.26421398, 0.23528142],inplace=True)
                              #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                              ])

valid_transforms = tt.Compose([
                               tt.Resize((image_size,image_size)),
                               #tt.Grayscale(num_output_channels=3),
                               #tt.RandomEqualize(p=1.0),
                               tt.ToTensor(),
                              #  tt.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225],inplace=True)
                               tt.Normalize([0.5630258, 0.44020364, 0.38816002], [0.24872069, 0.20780964, 0.18916753],inplace=True)
                               #tt.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5],inplace=True)
                              ])


# Data Loader

In [None]:
#주소 알아서 수정하기

classes_train = os.listdir(TRAIN_IMG_PATH)

origin_dataset = CustomImageFolder(PREPROC_TRAIN_IMG_PATH + 'crop/', transform=origin_transforms, cropper=no_crop)

train_dataset_flip = CustomImageFolder(PREPROC_TRAIN_IMG_PATH + 'crop/', transform=at_train_transforms1, cropper=no_crop)

train_dataset_rotate = CustomImageFolder(PREPROC_TRAIN_IMG_PATH + 'crop/', transform=at_train_transforms2, cropper=no_crop)

train_dataset_bright = CustomImageFolder(PREPROC_TRAIN_IMG_PATH + 'crop/', transform=at_train_transforms3, cropper=no_crop)

train_dataset_noise = CustomImageFolder(PREPROC_TRAIN_IMG_PATH + 'crop/', transform=at_train_transforms4, cropper=no_crop)

train_dataset_shift = CustomImageFolder(PREPROC_TRAIN_IMG_PATH + 'crop/', transform=at_train_transforms5, cropper=no_crop)

train_dataset_zoomout = CustomImageFolder(PREPROC_TRAIN_IMG_PATH + 'crop/', transform=at_train_transforms6, cropper=no_crop)

classes_valid = os.listdir(VALID_IMG_PATH)
valid_dataset = CustomImageFolder(PREPROC_VALID_IMG_PATH + 'crop/', valid_transforms, cropper=no_crop)

train_dataset = ConcatDataset([origin_dataset, train_dataset_flip, train_dataset_rotate, train_dataset_bright, train_dataset_noise, train_dataset_shift, train_dataset_zoomout])

train_dataloader = DataLoader(train_dataset, BATCH_SIZE,
                              shuffle=True, num_workers=8, pin_memory=True)

valid_dataloader = DataLoader(valid_dataset, BATCH_SIZE,
                              num_workers=8, pin_memory=True, shuffle=False)

# 얼굴 잘 잘렸는지 테스트 해보는 코드

In [None]:
import matplotlib.pyplot as plt
# 데이터셋과 DataLoader 테스트
def show_image(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

def test_dataloader(loader):
    for images, labels in loader:
        print(f"Batch size: {images.size(0)}")
        print(f"Image shape: {images.shape}")
        print(f"Labels: {labels}")
        #for i in range(images.size(0)):
            #show_image(images[i])
        break  # 첫 번째 배치만 테스트

In [None]:
print("Testing train DataLoader")
test_dataloader(train_dataloader)

Testing train DataLoader
Batch size: 64
Image shape: torch.Size([64, 3, 224, 224])
Labels: tensor([0, 2, 2, 1, 3, 0, 1, 0, 0, 1, 1, 2, 3, 0, 1, 3, 1, 2, 3, 0, 2, 1, 2, 1,
        0, 3, 0, 1, 3, 3, 3, 0, 3, 1, 0, 3, 0, 3, 3, 1, 1, 2, 2, 3, 3, 1, 2, 2,
        1, 2, 3, 3, 0, 2, 0, 0, 3, 1, 2, 2, 2, 2, 1, 3])


# 모델 선언

In [None]:
num_classes = len(classes_train)

# 허깅페이스 모델 이용시

In [None]:
from transformers import AutoImageProcessor, AutoModelForImageClassification

# Timm 모델들 사용시

In [None]:
!pip install timm
import timm



# EfficientNet

In [None]:
!pip install efficientnet_pytorch #efficientnet 사용시
from efficientnet_pytorch import EfficientNet
effi_model = EfficientNet.from_pretrained('efficientnet-b5') #b0~7까지 자유롭게

Loaded pretrained weights for efficientnet-b5


In [None]:
print(effi_model)

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d((0, 1, 0, 1))
  )
  (_bn0): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        48, 48, kernel_size=(3, 3), stride=[1, 1], groups=48, bias=False
        (static_padding): ZeroPad2d((1, 1, 1, 1))
      )
      (_bn1): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        48, 12, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        12, 48, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False
  

In [None]:
#model = torch.hub.load('hankyul2/EfficientNetV2-pytorch', 'efficientnet_v2_m', pretrained=True, dropout=0.0, stochastic_depth=0.0)

In [None]:
from efficientnet_pytorch.utils import Conv2dStaticSamePadding

In [None]:
# effi_model._conv_stem = Conv2dStaticSamePadding(1, 48, kernel_size=(3, 3), stride=(2, 2), bias=False, image_size = 224)

effi_model._fc = nn.Sequential(
    # nn.Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(2048, 4, bias=True) #오류나면 윗 코드들 사용
)

In [None]:
print(effi_model)

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d((0, 1, 0, 1))
  )
  (_bn0): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        48, 48, kernel_size=(3, 3), stride=[1, 1], groups=48, bias=False
        (static_padding): ZeroPad2d((1, 1, 1, 1))
      )
      (_bn1): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        48, 12, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        12, 48, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False
  

# MobileNet_V2

In [None]:
mobile_model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [None]:
print(mobile_model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [None]:
if isinstance(mobile_model.classifier, nn.Sequential):
    in_features = mobile_model.classifier[-1].in_features
else:
    in_features = mobile_model.classifier.in_features

# mobile_model.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# 모델의 마지막 레이어를 새로운 출력 레이어로 교체
mobile_model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=False),
    # nn.Conv2d(1280, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(1280, 4, bias=True) #오류나면 윗 코드들 사용
)

# mobile_model.classifier = nn.Sequential(
#     nn.Dropout(p=0.2, inplace=False),
#     nn.Linear(in_features, 4) #오류나면 윗 코드들 사용
# )
#convnext, vit,swin는 맨밑층만

In [None]:
print(mobile_model)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

# Swin Transformers

## swin 공식 모델들 살펴보기

In [None]:
#model_names = timm.list_models('*swin*')
#print(model_names)

## swin pretrained (face emotion)

In [None]:
processor = AutoImageProcessor.from_pretrained("MahmoudWSegni/swin-tiny-patch4-window7-224-finetuned-face-emotion-v12_2")
swin_model = AutoModelForImageClassification.from_pretrained("MahmoudWSegni/swin-tiny-patch4-window7-224-finetuned-face-emotion-v12_2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
print(swin_model)

SwinForImageClassification(
  (swin): SwinModel(
    (embeddings): SwinEmbeddings(
      (patch_embeddings): SwinPatchEmbeddings(
        (projection): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): SwinEncoder(
      (layers): ModuleList(
        (0): SwinStage(
          (blocks): ModuleList(
            (0-1): 2 x SwinLayer(
              (layernorm_before): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
              (attention): SwinAttention(
                (self): SwinSelfAttention(
                  (query): Linear(in_features=96, out_features=96, bias=True)
                  (key): Linear(in_features=96, out_features=96, bias=True)
                  (value): Linear(in_features=96, out_features=96, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
                )
                (output): SwinSelfOutput(
  

In [None]:
if isinstance(swin_model.classifier, nn.Sequential):
    in_features = swin_model.classifier[-1].in_features
else:
    in_features = swin_model.classifier.in_features

# swin_model.swin.embeddings.patch_embeddings.projection = nn.Conv2d(1, 96, kernel_size=(4, 4), stride=(4, 4))

# 모델의 마지막 레이어를 새로운 출력 레이어로 교체
swin_model.classifier = nn.Sequential(
    # nn.Dropout(p=0.2, inplace=False),
    # nn.Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(768, 4, bias=True) #오류나면 윗 코드들 사용
)

# swin_model.classifier = nn.Sequential(
#     nn.Linear(in_features, 4) #오류나면 윗 코드들 사용
# )
#convnext, vit,swin는 맨밑층만

In [None]:
print(swin_model)

SwinForImageClassification(
  (swin): SwinModel(
    (embeddings): SwinEmbeddings(
      (patch_embeddings): SwinPatchEmbeddings(
        (projection): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      )
      (norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): SwinEncoder(
      (layers): ModuleList(
        (0): SwinStage(
          (blocks): ModuleList(
            (0-1): 2 x SwinLayer(
              (layernorm_before): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
              (attention): SwinAttention(
                (self): SwinSelfAttention(
                  (query): Linear(in_features=96, out_features=96, bias=True)
                  (key): Linear(in_features=96, out_features=96, bias=True)
                  (value): Linear(in_features=96, out_features=96, bias=True)
                  (dropout): Dropout(p=0.0, inplace=False)
                )
                (output): SwinSelfOutput(
  

# VIT 모델 - emotion pretrained

In [None]:
processor = AutoImageProcessor.from_pretrained("StoneSeller/emotion-classifier-vit")
vit_model = AutoModelForImageClassification.from_pretrained("StoneSeller/emotion-classifier-vit")

In [None]:
print(vit_model)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [None]:
if isinstance(vit_model.classifier, nn.Sequential):
    in_features = vit_model.classifier[-1].in_features
else:
    in_features = vit_model.classifier.in_features

# vit_model.vit.embeddings.patch_embeddings.projection = nn.Conv2d(1, 768, kernel_size=(16, 16), stride=(16, 16))

# 모델의 마지막 레이어를 새로운 출력 레이어로 교체
vit_model.classifier = nn.Sequential(
    #nn.Dropout(p=0.2, inplace=False),
    # nn.Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(768, 4, bias=True) #오류나면 윗 코드들 사용
)

# vit_model.classifier = nn.Sequential(
#     nn.Linear(in_features, 4) #오류나면 윗 코드들 사용
# )
#convnext, vit,swin는 맨밑층만

In [None]:
print(vit_model)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

# ResNet - pretrained cartoon emotion

In [None]:
# processor = AutoImageProcessor.from_pretrained("jayanta/microsoft-resnet-50-cartoon-emotion-detection")
# model = AutoModelForImageClassification.from_pretrained("jayanta/microsoft-resnet-50-cartoon-emotion-detection")

# ResNet - zhanghang1989/ResNeSt

In [None]:
# !pip install fvcore
# model = torch.hub.load('zhanghang1989/ResNeSt', 'resnest50', pretrained=True)

# ResNet - hf_hub:timm/resnetrs50

In [None]:
res50_model = timm.create_model("hf_hub:timm/resnetrs50.tf_in1k", pretrained=True)

In [None]:
print(res50_model)

ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False

In [None]:
# in_features = res50_model.fc.in_features

# res50_model.conv1[0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

res50_model.fc = nn.Sequential(
    # nn.Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(2048, 4, bias=True) #오류나면 윗 코드들 사용
)

# res50_model.fc = nn.Linear(in_features, 4)

In [None]:
print(res50_model)

ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False

# ResNet v2

In [None]:
resV2_model = timm.create_model("hf_hub:timm/resnetv2_50x1_bit.goog_distilled_in1k", pretrained=True)

In [None]:
print(resV2_model)

ResNetV2(
  (stem): Sequential(
    (conv): StdConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (pad): ConstantPad2d(padding=(1, 1, 1, 1), value=0.0)
    (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (stages): Sequential(
    (0): ResNetStage(
      (blocks): Sequential(
        (0): PreActBottleneck(
          (downsample): DownsampleConv(
            (conv): StdConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm): Identity()
          )
          (norm1): GroupNormAct(
            32, 64, eps=1e-05, affine=True
            (drop): Identity()
            (act): ReLU(inplace=True)
          )
          (conv1): StdConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): GroupNormAct(
            32, 64, eps=1e-05, affine=True
            (drop): Identity()
            (act): ReLU(inplace=True)
          )
          (conv2): StdConv2d(64, 64, kernel_size=(3, 3)

In [None]:
from timm.layers import StdConv2d

In [None]:
# resV2_model.stem.conv = StdConv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

resV2_model.head.fc = nn.Sequential(
    nn.Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Conv2d(32, 4, kernel_size=(1, 1), stride=(1, 1), bias=True) #오류나면 윗 코드들 사용
)

# resV2_model.head = nn.Sequential(
#     nn.Dropout(p=0.2),
#     nn.AdaptiveAvgPool2d(1),
#     nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(resV2_model.head.in_features, 4)
# )

In [None]:
print(resV2_model)

ResNetV2(
  (stem): Sequential(
    (conv): StdConv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (pad): ConstantPad2d(padding=(1, 1, 1, 1), value=0.0)
    (pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (stages): Sequential(
    (0): ResNetStage(
      (blocks): Sequential(
        (0): PreActBottleneck(
          (downsample): DownsampleConv(
            (conv): StdConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm): Identity()
          )
          (norm1): GroupNormAct(
            32, 64, eps=1e-05, affine=True
            (drop): Identity()
            (act): ReLU(inplace=True)
          )
          (conv1): StdConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): GroupNormAct(
            32, 64, eps=1e-05, affine=True
            (drop): Identity()
            (act): ReLU(inplace=True)
          )
          (conv2): StdConv2d(64, 64, kernel_size=(3, 3)

# densenet

In [None]:
!pip install timm
import timm
dense_model = timm.create_model('densenet121', pretrained=True)



In [None]:
print(dense_model)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNormAct2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): ReLU(inplace=True)
    )
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): DenseBlock(
      (denselayer1): DenseLayer(
        (norm1): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNormAct2d(
          128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
  

In [None]:
if isinstance(dense_model.classifier, nn.Sequential):
    in_features = dense_model.classifier[-1].in_features
else:
    in_features = dense_model.classifier.in_features

# dense_model.features.conv0 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# 모델의 마지막 레이어를 새로운 출력 레이어로 교체

dense_model.classifier = nn.Sequential(
    # nn.Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(1024, 4, bias=True) #오류나면 윗 코드들 사용
)

# dense_model.classifier = nn.Sequential(
#     # nn.Dropout(p=0.2),
#     # nn.AdaptiveAvgPool2d(1),
#     # nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(in_features, num_classes) #오류나면 윗 코드들 사용
# )

In [None]:
print(dense_model)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNormAct2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): ReLU(inplace=True)
    )
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): DenseBlock(
      (denselayer1): DenseLayer(
        (norm1): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNormAct2d(
          128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
  

# CONvNeXt - FaceMask-Finetuned

In [None]:
processor = AutoImageProcessor.from_pretrained("AkshatSurolia/ConvNeXt-FaceMask-Finetuned")
conv_model = AutoModelForImageClassification.from_pretrained("AkshatSurolia/ConvNeXt-FaceMask-Finetuned")

In [None]:
print(conv_model)

ConvNextForImageClassification(
  (convnext): ConvNextModel(
    (embeddings): ConvNextEmbeddings(
      (patch_embeddings): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (layernorm): ConvNextLayerNorm()
    )
    (encoder): ConvNextEncoder(
      (stages): ModuleList(
        (0): ConvNextStage(
          (downsampling_layer): Identity()
          (layers): Sequential(
            (0): ConvNextLayer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (layernorm): ConvNextLayerNorm()
              (pwconv1): Linear(in_features=96, out_features=384, bias=True)
              (act): GELUActivation()
              (pwconv2): Linear(in_features=384, out_features=96, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextLayer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (layernorm): ConvNextLayerNorm()
              

In [None]:
if isinstance(conv_model.classifier, nn.Sequential):
    in_features = conv_model.classifier[-1].in_features
else:
    in_features = conv_model.classifier.in_features

# 모델의 마지막 레이어를 새로운 출력 레이어로 교체

# conv_model.convnext.embeddings.patch_embeddings = nn.Conv2d(1, 96, kernel_size=(4, 4), stride=(4, 4))

# conv_model.classifier = nn.Sequential(
#     nn.Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
#     nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=True),
#     nn.Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=True),
#     nn.Linear(16, 4, bias=True) #오류나면 윗 코드들 사용
# )

conv_model.classifier = nn.Linear(in_features, num_classes, bias=True)

In [None]:
print(conv_model)

ConvNextForImageClassification(
  (convnext): ConvNextModel(
    (embeddings): ConvNextEmbeddings(
      (patch_embeddings): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (layernorm): ConvNextLayerNorm()
    )
    (encoder): ConvNextEncoder(
      (stages): ModuleList(
        (0): ConvNextStage(
          (downsampling_layer): Identity()
          (layers): Sequential(
            (0): ConvNextLayer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (layernorm): ConvNextLayerNorm()
              (pwconv1): Linear(in_features=96, out_features=384, bias=True)
              (act): GELUActivation()
              (pwconv2): Linear(in_features=384, out_features=96, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextLayer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (layernorm): ConvNextLayerNorm()
              

# CONvNeXt v2 - facebook/convnextv2-tiny-1k-224

In [None]:
processor = AutoImageProcessor.from_pretrained("facebook/convnextv2-tiny-1k-224")
convv2_model = AutoModelForImageClassification.from_pretrained("facebook/convnextv2-tiny-1k-224")

In [None]:
print(convv2_model)

ConvNextV2ForImageClassification(
  (convnextv2): ConvNextV2Model(
    (embeddings): ConvNextV2Embeddings(
      (patch_embeddings): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (layernorm): ConvNextV2LayerNorm()
    )
    (encoder): ConvNextV2Encoder(
      (stages): ModuleList(
        (0): ConvNextV2Stage(
          (downsampling_layer): Identity()
          (layers): Sequential(
            (0): ConvNextV2Layer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
              (layernorm): ConvNextV2LayerNorm()
              (pwconv1): Linear(in_features=96, out_features=384, bias=True)
              (act): GELUActivation()
              (grn): ConvNextV2GRN()
              (pwconv2): Linear(in_features=384, out_features=96, bias=True)
              (drop_path): Identity()
            )
            (1): ConvNextV2Layer(
              (dwconv): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
    

In [None]:
if isinstance(convv2_model.classifier, nn.Sequential):
    in_features = convv2_model.classifier[-1].in_features
else:
    in_features = convv2_model.classifier.in_features

# 모델의 마지막 레이어를 새로운 출력 레이어로 교체

# convv2_model.convnextv2.embeddings.patch_embeddings = nn.Conv2d(1, 96, kernel_size=(4, 4), stride=(4, 4))

# convv2_model.classifier = nn.Sequential(
#     nn.Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
#     nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=True),
#     nn.Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=True),
#     nn.Linear(16, 4, bias=True) #오류나면 윗 코드들 사용
# )

convv2_model.classifier = nn.Linear(in_features, num_classes, bias=True)

#convnext, vit,swin는 맨밑층만

# coatnet - pretrained (어떤 데이터로?)

In [None]:
coat_model = timm.create_model("hf_hub:timm/coatnet_3_rw_224.sw_in12k", pretrained=True)

In [None]:
print(coat_model)

MaxxVit(
  (stem): Stem(
    (conv1): Conv2d(3, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (norm1): BatchNormAct2d(
      96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (conv2): Conv2d(96, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (stages): Sequential(
    (0): MaxxVitStage(
      (blocks): Sequential(
        (0): MbConvBlock(
          (shortcut): Downsample2d(
            (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (expand): Identity()
          )
          (pre_norm): BatchNormAct2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (down): Identity()
          (conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05

In [None]:
# coat_model.stem.conv1 = nn.Conv2d(1, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

coat_model.head.fc = nn.Sequential(
    # nn.Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(1536, 4, bias=True) #오류나면 윗 코드들 사용
)


# coat_model.head = nn.Sequential(
#     nn.Dropout(p=0.2),
#     nn.AdaptiveAvgPool2d(1),
#     nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(coat_model.head.in_features, 4)
# )

In [None]:
print(coat_model)

MaxxVit(
  (stem): Stem(
    (conv1): Conv2d(3, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (norm1): BatchNormAct2d(
      96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): SiLU(inplace=True)
    )
    (conv2): Conv2d(96, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (stages): Sequential(
    (0): MaxxVitStage(
      (blocks): Sequential(
        (0): MbConvBlock(
          (shortcut): Downsample2d(
            (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
            (expand): Identity()
          )
          (pre_norm): BatchNormAct2d(
            192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
            (drop): Identity()
            (act): SiLU(inplace=True)
          )
          (down): Identity()
          (conv1_1x1): Conv2d(192, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm1): BatchNormAct2d(
            768, eps=1e-05

# Convmixer

In [None]:
convmix_model = timm.create_model("hf_hub:timm/convmixer_768_32.in1k", pretrained=True)

In [None]:
print(convmix_model)

ConvMixer(
  (stem): Sequential(
    (0): Conv2d(3, 768, kernel_size=(7, 7), stride=(7, 7))
    (1): ReLU()
    (2): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): Residual(
        (fn): Sequential(
          (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=same, groups=768)
          (1): ReLU()
          (2): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (1): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
      (2): ReLU()
      (3): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Sequential(
      (0): Residual(
        (fn): Sequential(
          (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=same, groups=768)
          (1): ReLU()
          (2): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
 

In [None]:
# convmix_model.stem[0] = nn.Conv2d(1, 768, kernel_size=(7, 7), stride=(7, 7))

convmix_model.head = nn.Sequential(
    # nn.Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(768, 4, bias=True) #오류나면 윗 코드들 사용
)


# convmix_model.head = nn.Sequential(
#     # nn.Dropout(p=0.2),
#     # nn.AdaptiveAvgPool2d(1),
#     # nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(convmix_model.head.in_features, 4)
# )

In [None]:
print(convmix_model)

ConvMixer(
  (stem): Sequential(
    (0): Conv2d(3, 768, kernel_size=(7, 7), stride=(7, 7))
    (1): ReLU()
    (2): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): Residual(
        (fn): Sequential(
          (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=same, groups=768)
          (1): ReLU()
          (2): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (1): Conv2d(768, 768, kernel_size=(1, 1), stride=(1, 1))
      (2): ReLU()
      (3): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): Sequential(
      (0): Residual(
        (fn): Sequential(
          (0): Conv2d(768, 768, kernel_size=(7, 7), stride=(1, 1), padding=same, groups=768)
          (1): ReLU()
          (2): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
 

# Regnet

In [None]:
reg_model = timm.create_model("hf_hub:timm/regnety_032.ra_in1k", pretrained=True)

In [None]:
print(reg_model)

RegNet(
  (stem): ConvNormAct(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNormAct2d(
      32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): ReLU(inplace=True)
    )
  )
  (s1): RegStage(
    (b1): Bottleneck(
      (conv1): ConvNormAct(
        (conv): Conv2d(32, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNormAct2d(
          72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
      )
      (conv2): ConvNormAct(
        (conv): Conv2d(72, 72, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=3, bias=False)
        (bn): BatchNormAct2d(
          72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
      )
      (se): SEModule(
        (fc1): Conv2d(72, 8, kerne

In [None]:
# reg_model.stem.conv = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

reg_model.final_conv = nn.Sequential(
    nn.Conv2d(1512, 512, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1), bias=True)
)

reg_model.head.fc = nn.Sequential(
    nn.Linear(32, 4, bias=True) #오류나면 윗 코드들 사용
)


# reg_model.head = nn.Sequential(
#     nn.Dropout(p=0.2),
#     nn.AdaptiveAvgPool2d(1),
#     nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(reg_model.head.in_features, num_classes)
# )
# nfnet 전

In [None]:
print(reg_model)

RegNet(
  (stem): ConvNormAct(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn): BatchNormAct2d(
      32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): ReLU(inplace=True)
    )
  )
  (s1): RegStage(
    (b1): Bottleneck(
      (conv1): ConvNormAct(
        (conv): Conv2d(32, 72, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNormAct2d(
          72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
      )
      (conv2): ConvNormAct(
        (conv): Conv2d(72, 72, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=3, bias=False)
        (bn): BatchNormAct2d(
          72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
      )
      (se): SEModule(
        (fc1): Conv2d(72, 8, kerne

# NFNET - hf_hub:timm/nfnet_l0

In [None]:
# model = timm.create_model("hf_hub:timm/nfnet_l0.ra2_in1k", pretrained=True)

In [None]:
# in_features = model.fc.in_features

# model.fc = nn.Linear(in_features, num_classes)

# NFNET - hf_hub:timm/dm_nfnet_f1

In [None]:
nf_model = timm.create_model("hf_hub:timm/dm_nfnet_f1.dm_in1k", pretrained=True)

In [None]:
print(nf_model)

NormFreeNet(
  (stem): Sequential(
    (conv1): ScaledStdConv2dSame(3, 16, kernel_size=(3, 3), stride=(2, 2))
    (act2): GammaAct()
    (conv2): ScaledStdConv2dSame(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (act3): GammaAct()
    (conv3): ScaledStdConv2dSame(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (act4): GammaAct()
    (conv4): ScaledStdConv2dSame(64, 128, kernel_size=(3, 3), stride=(2, 2))
  )
  (stages): Sequential(
    (0): Sequential(
      (0): NormFreeBlock(
        (downsample): DownsampleAvg(
          (pool): Identity()
          (conv): ScaledStdConv2dSame(128, 256, kernel_size=(1, 1), stride=(1, 1))
        )
        (act1): GammaAct()
        (conv1): ScaledStdConv2dSame(128, 128, kernel_size=(1, 1), stride=(1, 1))
        (act2): GammaAct()
        (conv2): ScaledStdConv2dSame(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (act2b): GammaAct()
        (conv2b): ScaledStdConv2dSame(128, 128, kernel_size=(3, 

In [None]:
from timm.layers import ScaledStdConv2dSame

In [None]:
# nf_model.stem.conv1 = ScaledStdConv2dSame(1, 16, kernel_size=(3, 3), stride=(2, 2))

nf_model.final_conv = nn.Sequential(
    ScaledStdConv2dSame(1536, 3072, kernel_size=(1, 1), stride=(1, 1)),
    ScaledStdConv2dSame(3072, 1024, kernel_size=(1, 1), stride=(1, 1)),
    ScaledStdConv2dSame(1024, 256, kernel_size=(1, 1), stride=(1, 1)),
    ScaledStdConv2dSame(256, 32, kernel_size=(1, 1), stride=(1, 1))
)


nf_model.head.fc = nn.Sequential(
    # nn.Conv2d(3072, 1024, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=True),
    # nn.Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1), bias=True),
    nn.Linear(32, 4, bias=True) #오류나면 윗 코드들 사용
)

# nf_model.head = nn.Sequential(
#     nn.Dropout(p=0.2),
#     nn.AdaptiveAvgPool2d(1),
#     nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(nf_model.head.in_features, num_classes)
# )

In [None]:
print(nf_model)

NormFreeNet(
  (stem): Sequential(
    (conv1): ScaledStdConv2dSame(3, 16, kernel_size=(3, 3), stride=(2, 2))
    (act2): GammaAct()
    (conv2): ScaledStdConv2dSame(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (act3): GammaAct()
    (conv3): ScaledStdConv2dSame(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (act4): GammaAct()
    (conv4): ScaledStdConv2dSame(64, 128, kernel_size=(3, 3), stride=(2, 2))
  )
  (stages): Sequential(
    (0): Sequential(
      (0): NormFreeBlock(
        (downsample): DownsampleAvg(
          (pool): Identity()
          (conv): ScaledStdConv2dSame(128, 256, kernel_size=(1, 1), stride=(1, 1))
        )
        (act1): GammaAct()
        (conv1): ScaledStdConv2dSame(128, 128, kernel_size=(1, 1), stride=(1, 1))
        (act2): GammaAct()
        (conv2): ScaledStdConv2dSame(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (act2b): GammaAct()
        (conv2b): ScaledStdConv2dSame(128, 128, kernel_size=(3, 

# 마지막 층 수정 - 모델마다 다를 수 있음

In [None]:
# nf_model.head = nn.Sequential(
#     nn.Dropout(p=0.2),
#     nn.AdaptiveAvgPool2d(1),
#     nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(model.head.in_features, num_classes)
# )
# # nfnet 전

In [None]:
# if isinstance(model.classifier, nn.Sequential):
#     in_features = model.classifier[-1].in_features
# else:
#     in_features = model.classifier.in_features

# # 모델의 마지막 레이어를 새로운 출력 레이어로 교체
# model.classifier = nn.Sequential(
#     #nn.Dropout(p=0.2),
#     #nn.AdaptiveAvgPool2d(1),
#     #nn.Flatten(),  # 평탄화 레이어 추가
#     nn.Linear(in_features, num_classes) #오류나면 윗 코드들 사용
# )
# #convnext, vit,swin는 맨밑층만

In [None]:
# in_features = model.fc.in_features

# model.fc = nn.Linear(in_features, num_classes)


In [None]:
# model._fc = nn.Linear(model._fc.in_features, num_classes)
# #efficientnet

# 모델 학습 및 평가

In [None]:
anger_num = 0
anger_correct = 0
sadness_num = 0
sadness_correct = 0
happy_num = 0
happy_correct = 0
panic_num = 0
panic_correct = 0

In [None]:
def train_epoch(model, dataset_loader, epoch, device, optimizer, criterion, scheduler):
    running_loss = 0.0
    correct=0
    total=0
    model.train()
    start_time = time.time()

    for i, (data, target) in enumerate(tqdm(dataset_loader)):
        inputs, labels = data, target
        inputs = inputs.float()
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        if hasattr(outputs, 'logits'):
            logits = outputs.logits
            loss = F.cross_entropy(logits, labels)
        else:
            loss = F.cross_entropy(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if hasattr(outputs, 'logits'):
            _, predicted = torch.max(outputs.logits, 1)
        else:
            _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        #print(100 * correct / total)#확인하고 싶을 때

    train_accuracy = 100 * correct / total
    scheduler.step()

    return running_loss, train_accuracy

def evaluation(model, dataset_loader, device, criterion):
    correct = 0
    valid_loss = 0

    model.eval()

    with torch.no_grad():
        for i, (data, target) in enumerate(tqdm(dataset_loader)):

            images, labels = data, target
            images = images.float()
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            if hasattr(outputs, 'logits'):
                logits = outputs.logits
                valid_loss += F.cross_entropy(logits, labels).item()
                pred = logits.max(1, keepdim=True)[1]
            else:
                valid_loss += F.cross_entropy(outputs, labels).item()
                pred = outputs.max(1, keepdim=True)[1]

            correct += pred.eq(labels.view_as(pred)).sum().item()

    accuracy = 100 * correct / len(dataset_loader.dataset)
    return valid_loss, accuracy

def load_model(best_model, device):
    checkpoint = torch.load('model_save.pth')
    best_model.load_state_dict(checkpoint['model'])
    best_model.to(device)

    optimizer = getOptimizer(best_model)
    optimizer.load_state_dict(checkpoint['optimizer'])

    for state in optimizer.state.values():
        for k, v in state.items():
            if torch.is_tensor(v):
                state[k] = v.to(device)
    return best_model, optimizer

def restart_training(model, learning_rate, device):
    model, optimizer = load_model(model, device)
    return model, optimizer

# 여기서 원하는 옵티마이저 모델로 바꿔줄 수 있음
def getOptimizer(model):
    optName = 'Nadam'
    if optName == "sgd":
        print("Optimizer - SGD")
        return optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
    elif optName == "adam":
        print("Optimizer - Adam")
        return optim.Adam(model.parameters(), lr=0.00012)
    elif optName =="Nadam":
        print("Optimizer - Nadam")
        betas = (0.9, 0.999)
        epsilon = 1e-08
        return optim.NAdam(model.parameters(), lr=0.0001, betas=betas, eps=epsilon)
    # by default
    #print("Optimizer - Adadelta")
    #return optim.Adadelta(model.parameters(), lr=0.1, rho=0.95, eps=1e-8)

def train(model, model_name):
    device = get_device()
    print("Device -", device)

    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = getOptimizer(model)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer = optimizer, lr_lambda = lambda epoch : 0.95 ** epoch)

    loss_es = []
    best_accuracy = 0.0
    best_model = -1

    epoch = 1
    while epoch <= 20:
        start_time = time.time()  # 에포크 시작 시간 기록
        running_loss, train_accuracy = train_epoch(model, train_dataloader, epoch,
                             device, optimizer, criterion, scheduler)
        valid_loss, accuracy = evaluation(model, valid_dataloader, device, criterion)

        loss_es.append((running_loss, valid_loss, accuracy))

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = epoch

            # write the best model as a file
            state = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, model_name + 'model_save.pth')
        end_time = time.time()  # 에포크 종료 시간 기록
        epoch_duration = end_time - start_time  # 에포크 실행 시간 계산
        print('epoch: {}\trunning_loss: {:.5f}\t training_accuracy: {:.5f}\tvalid_loss: {:.5f}\taccuracy: {:.3f}\t\tbest_model{}\tEpoch Duration: {:.2f} seconds'.format(
            epoch, running_loss,train_accuracy, valid_loss, accuracy, best_model, epoch_duration))
        epoch += 1
    print('Trainig Complete.')

## 학습&평가 함수 호출 - 모델 수정시 반드시 목록에 반영


In [None]:
models = []
#models.append((effi_model, "------------------ Efficient b5 ---------------------"))
#models.append((mobile_model, "--------------------- MobileNet ---------------------"))
models.append((swin_model, "--------------------- SWIN ---------------------"))
#models.append((vit_model, "--------------------- VIT ---------------------"))
#models.append((res50_model, "--------------------- ResNet50 ---------------------"))
#models.append((resV2_model, "--------------------- ResNet V2 50 ---------------------"))
#models.append((dense_model, "--------------------- DenseNet ---------------------"))
#models.append((conv_model, "--------------------- CONVNet ---------------------"))
#models.append((convv2_model, "--------------------- CONVv2Net ---------------------"))
#models.append((coat_model, "--------------------- COATNet ---------------------"))
#models.append((convmix_model, "--------------------- CONVMIX Net ---------------------"))
#models.append((reg_model, "--------------------- RegNet ---------------------"))
#models.append((nf_model, "--------------------- NFNet ---------------------"))

In [None]:
for model in models:
  print(model[1])
  train(model[0], model[1].split(' ')[1])

--------------------- SWIN ---------------------
Device - cuda
Optimizer - Nadam


  self.pid = os.fork()
100%|██████████| 656/656 [02:53<00:00,  3.78it/s]
100%|██████████| 19/19 [00:05<00:00,  3.22it/s]


epoch: 1	running_loss: 224.72130	 training_accuracy: 87.15669	valid_loss: 12.18058	accuracy: 83.333		best_model1	Epoch Duration: 179.90 seconds


100%|██████████| 656/656 [02:50<00:00,  3.86it/s]
100%|██████████| 19/19 [00:06<00:00,  3.08it/s]


epoch: 2	running_loss: 80.28746	 training_accuracy: 95.67280	valid_loss: 18.46052	accuracy: 78.667		best_model1	Epoch Duration: 176.35 seconds


100%|██████████| 656/656 [02:50<00:00,  3.86it/s]
100%|██████████| 19/19 [00:05<00:00,  3.25it/s]


epoch: 3	running_loss: 43.35501	 training_accuracy: 97.66832	valid_loss: 16.85135	accuracy: 82.583		best_model1	Epoch Duration: 175.99 seconds


100%|██████████| 656/656 [02:49<00:00,  3.86it/s]
100%|██████████| 19/19 [00:05<00:00,  3.25it/s]


epoch: 4	running_loss: 30.22879	 training_accuracy: 98.44078	valid_loss: 17.91673	accuracy: 83.000		best_model1	Epoch Duration: 175.78 seconds


100%|██████████| 656/656 [02:50<00:00,  3.84it/s]
100%|██████████| 19/19 [00:05<00:00,  3.20it/s]


epoch: 5	running_loss: 25.51255	 training_accuracy: 98.63866	valid_loss: 20.22672	accuracy: 81.083		best_model1	Epoch Duration: 176.66 seconds


100%|██████████| 656/656 [02:50<00:00,  3.85it/s]
100%|██████████| 19/19 [00:06<00:00,  3.06it/s]


epoch: 6	running_loss: 21.46760	 training_accuracy: 98.90092	valid_loss: 22.08403	accuracy: 81.083		best_model1	Epoch Duration: 176.49 seconds


100%|██████████| 656/656 [02:50<00:00,  3.84it/s]
100%|██████████| 19/19 [00:05<00:00,  3.23it/s]


epoch: 7	running_loss: 17.26668	 training_accuracy: 99.09165	valid_loss: 22.32905	accuracy: 83.167		best_model1	Epoch Duration: 176.67 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:05<00:00,  3.22it/s]


epoch: 8	running_loss: 14.42266	 training_accuracy: 99.23469	valid_loss: 21.02635	accuracy: 83.167		best_model1	Epoch Duration: 175.46 seconds


100%|██████████| 656/656 [02:49<00:00,  3.88it/s]
100%|██████████| 19/19 [00:06<00:00,  3.04it/s]


epoch: 9	running_loss: 13.24190	 training_accuracy: 99.34436	valid_loss: 23.87110	accuracy: 82.250		best_model1	Epoch Duration: 175.43 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:05<00:00,  3.21it/s]


epoch: 10	running_loss: 13.57003	 training_accuracy: 99.32767	valid_loss: 23.13596	accuracy: 81.083		best_model1	Epoch Duration: 175.59 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:05<00:00,  3.25it/s]


epoch: 11	running_loss: 9.98368	 training_accuracy: 99.50648	valid_loss: 23.23127	accuracy: 82.667		best_model1	Epoch Duration: 175.53 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:06<00:00,  3.11it/s]


epoch: 12	running_loss: 10.27626	 training_accuracy: 99.47549	valid_loss: 27.16430	accuracy: 82.333		best_model1	Epoch Duration: 175.73 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:05<00:00,  3.25it/s]


epoch: 13	running_loss: 8.09622	 training_accuracy: 99.59470	valid_loss: 25.03056	accuracy: 82.750		best_model1	Epoch Duration: 175.50 seconds


100%|██████████| 656/656 [02:49<00:00,  3.88it/s]
100%|██████████| 19/19 [00:05<00:00,  3.27it/s]


epoch: 14	running_loss: 7.61179	 training_accuracy: 99.62808	valid_loss: 26.76137	accuracy: 82.333		best_model1	Epoch Duration: 175.05 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:06<00:00,  3.08it/s]


epoch: 15	running_loss: 7.82896	 training_accuracy: 99.60423	valid_loss: 27.59501	accuracy: 81.417		best_model1	Epoch Duration: 175.77 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:05<00:00,  3.23it/s]


epoch: 16	running_loss: 6.43751	 training_accuracy: 99.67814	valid_loss: 27.51564	accuracy: 82.250		best_model1	Epoch Duration: 175.28 seconds


100%|██████████| 656/656 [02:49<00:00,  3.86it/s]
100%|██████████| 19/19 [00:06<00:00,  3.17it/s]


epoch: 17	running_loss: 6.47391	 training_accuracy: 99.65430	valid_loss: 28.77616	accuracy: 81.250		best_model1	Epoch Duration: 176.01 seconds


100%|██████████| 656/656 [02:49<00:00,  3.87it/s]
100%|██████████| 19/19 [00:05<00:00,  3.24it/s]


epoch: 18	running_loss: 5.23901	 training_accuracy: 99.72582	valid_loss: 28.73449	accuracy: 82.333		best_model1	Epoch Duration: 175.40 seconds


100%|██████████| 656/656 [02:49<00:00,  3.88it/s]
100%|██████████| 19/19 [00:05<00:00,  3.25it/s]


epoch: 19	running_loss: 3.99162	 training_accuracy: 99.81881	valid_loss: 27.04925	accuracy: 81.583		best_model1	Epoch Duration: 174.95 seconds


100%|██████████| 656/656 [02:49<00:00,  3.88it/s]
100%|██████████| 19/19 [00:06<00:00,  3.14it/s]


epoch: 20	running_loss: 3.76673	 training_accuracy: 99.79020	valid_loss: 32.34792	accuracy: 82.333		best_model1	Epoch Duration: 175.08 seconds
Trainig Complete.
--------------------- VIT ---------------------
Device - cuda
Optimizer - Nadam


100%|██████████| 656/656 [04:38<00:00,  2.36it/s]
100%|██████████| 19/19 [00:05<00:00,  3.29it/s]


epoch: 1	running_loss: 252.25824	 training_accuracy: 86.28171	valid_loss: 9.57656	accuracy: 82.667		best_model1	Epoch Duration: 285.20 seconds


100%|██████████| 656/656 [04:40<00:00,  2.34it/s]
100%|██████████| 19/19 [00:05<00:00,  3.17it/s]


epoch: 2	running_loss: 91.83320	 training_accuracy: 95.43916	valid_loss: 11.99521	accuracy: 82.167		best_model1	Epoch Duration: 286.40 seconds


100%|██████████| 656/656 [04:37<00:00,  2.36it/s]
100%|██████████| 19/19 [00:05<00:00,  3.28it/s]


epoch: 3	running_loss: 47.14040	 training_accuracy: 97.65878	valid_loss: 14.23394	accuracy: 82.083		best_model1	Epoch Duration: 283.38 seconds


100%|██████████| 656/656 [04:37<00:00,  2.36it/s]
100%|██████████| 19/19 [00:05<00:00,  3.20it/s]


epoch: 4	running_loss: 30.79578	 training_accuracy: 98.39548	valid_loss: 16.41913	accuracy: 82.250		best_model1	Epoch Duration: 283.76 seconds


100%|██████████| 656/656 [04:37<00:00,  2.36it/s]
100%|██████████| 19/19 [00:05<00:00,  3.25it/s]


epoch: 5	running_loss: 21.10996	 training_accuracy: 98.93668	valid_loss: 16.07338	accuracy: 82.833		best_model5	Epoch Duration: 285.59 seconds


100%|██████████| 656/656 [04:40<00:00,  2.34it/s]
100%|██████████| 19/19 [00:05<00:00,  3.18it/s]


epoch: 6	running_loss: 15.85352	 training_accuracy: 99.21324	valid_loss: 14.87001	accuracy: 82.917		best_model6	Epoch Duration: 288.10 seconds


100%|██████████| 656/656 [04:39<00:00,  2.35it/s]
100%|██████████| 19/19 [00:05<00:00,  3.26it/s]


epoch: 7	running_loss: 12.50848	 training_accuracy: 99.36582	valid_loss: 18.63325	accuracy: 81.167		best_model6	Epoch Duration: 285.01 seconds


100%|██████████| 656/656 [04:37<00:00,  2.36it/s]
100%|██████████| 19/19 [00:05<00:00,  3.21it/s]


epoch: 8	running_loss: 11.21814	 training_accuracy: 99.46357	valid_loss: 19.37543	accuracy: 80.917		best_model6	Epoch Duration: 283.71 seconds


 96%|█████████▋| 633/656 [04:28<00:09,  2.38it/s]

In [None]:
#model = effi_model
#model = mobile_model
#model = swin_model
# model = vit_model
#model = res50_model
#model = resV2_model
#model = dense_model
#model = conv_model
# model = coat_model
#model = convmix_model
#model = reg_model
#model = nf_model

# train(model)

# 세션 종료

In [None]:
# from google.colab import runtime
# runtime.unassign()

# 모델 이어서 학습

In [None]:
def load_model(best_model, device):
    checkpoint = torch.load('model_save.pth')
    best_model.load_state_dict(checkpoint['model'])
    best_model.to(device)

    optimizer = getOptimizer(best_model)
    optimizer.load_state_dict(checkpoint['optimizer'])

    for state in optimizer.state.values():
        for k, v in state.items():
            if torch.is_tensor(v):
                state[k] = v.to(device)
    return best_model, optimizer

In [None]:
# # Restart training
# learning_rate = 0.0005
# device = get_device()
# model, optimizer = restart_training(model, learning_rate, device)

# # Continue training with the restarted model
# train(model)

# 테스트 베이스라인 - 안돌려봄


In [None]:
def evaluation_test():
    global model
    batch_size = 128
    image_size = (224, 224)

    transform_test = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize(image_size),
        transforms.ToTensor(),
    ])
    test_dataset = ImageFolder('/content/dataset/test', transform_test)
    private_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                                     num_workers=4)
    device = get_device()
    checkpoint = torch.load('model_eff.pth', map_location=device)

    #model
    model.load_state_dict(checkpoint['model'])

    best_trained_model = model.to(device)

    best_trained_model.eval()

    predictions = []
    with torch.no_grad():
        for data, target in private_dataloader:

            images, labels = data, target
            images = images.float()
            images  = images.to(device)

            outputs = best_trained_model(images)
            predictions.append(outputs.max(1, keepdim=True)[1])

    for idx, prediction in enumerate(predictions):
        predictions[idx] = prediction.cpu().numpy()
    predictions = np.concatenate(predictions)
    predictions = pd.Series(predictions.reshape(predictions.shape[0]), name='Predicted')
    predictions.to_csv('./predictions.csv',index=False, header=None)

In [None]:
#evaluation_test()

# 학습중이던 모델 옮기기

In [None]:
# import shutil
# import os

# # 원본 파일 경로
# source_path = '/content/model_save.pth'

# # 이동할 대상 폴더 경로
# destination_folder = '/content/drive/MyDrive/학습중인 모델'

# # 대상 파일 경로
# destination_path = os.path.join(destination_folder, 'model_save_resnetv2.pth')

# # 폴더가 존재하지 않으면 생성
# if not os.path.exists(destination_folder):
#     os.makedirs(destination_folder)

# # 파일 이동
# shutil.move(source_path, destination_path)

# print(f"File moved to {destination_path}")


In [None]:
import shutil
import os

# 원본 파일 경로
for model in models:
    source_path = '/content/' + model[1].split(' ')[1] + 'model_save.pth'

    # 이동할 대상 폴더 경로
    destination_folder = '/content/drive/MyDrive/학습중인 모델'

    # 대상 파일 경로
    destination_path = os.path.join(destination_folder, source_path.split('/')[-1])

    # 폴더가 존재하지 않으면 생성
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # 파일 이동
    shutil.move(source_path, destination_path)

    print(f"File moved to {destination_path}")