In [1]:
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import timm
import torch.nn as nn
from tqdm import tqdm 
from torch.optim.lr_scheduler import ReduceLROnPlateau


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "5"

device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'


train_df = pd.read_csv('../data/csv/train/snack_RMBG.csv')
val_df = pd.read_csv('../data/csv/val/snack_RMBG.csv')
num_classes = 1637


print(f'train shape: {train_df.shape}\nval shape: {val_df.shape}')

train shape: (93609, 18)
val shape: (12328, 18)


In [3]:
class PixelDropout(object):
    def __init__(self, dropout_prob=0.1):
        self.dropout_prob = dropout_prob

    def __call__(self, img):
        mask = torch.rand_like(img) > self.dropout_prob
        img = img * mask
        return img


class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, transform=None):
        self.img_labels = annotations_file
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = self.img_labels.iloc[idx]['path']
        image = Image.open(img_path).convert('RGB')
        label = self.img_labels.iloc[idx]['class']
        if self.transform:
            image = self.transform(image)
        return image, label


# 데이터 전처리
transform = transforms.Compose([
    # transforms.Resize((224, 224)),
    transforms.RandomApply([transforms.RandomResizedCrop((224, 224))], p=0.1),
    transforms.RandomHorizontalFlip(),  # 좌우 반전
    transforms.RandomRotation(30),  # 30도 이내의 무작위 회전
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # 밝기, 대비, 채도, 색조 변화
    transforms.ToTensor(),
    PixelDropout(dropout_prob=0.1),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# 사용자 정의 데이터셋 인스턴스 생성
train_dataset = CustomImageDataset(train_df,transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = CustomImageDataset(val_df, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)


In [4]:
# class CustomImageClassifier(nn.Module):
#     def __init__(self, num_classes):
#         super(CustomImageClassifier, self).__init__()
#         self.base_model = timm.create_model('resnet50.a1_in1k', pretrained=True, num_classes=num_classes)

#     def forward(self, x):
#         x = self.base_model(x)
#         return x


# model = CustomImageClassifier(num_classes)

model = timm.create_model('efficientnet_b3.ra2_in1k', pretrained=True, num_classes=num_classes)

model.to(device)



EfficientNet(
  (conv_stem): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
        (bn1): BatchNormAct2d(
          40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(40, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
    

In [5]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.1, verbose=True)




In [6]:
# 초기 최소 검증 손실값 설정
min_val_loss = float('inf')

# 학습 및 검증 과정
for epoch in range(100):  # 50 에폭 동안 학습
    model.train()
    train_loss, val_loss = 0.0, 0.0
    
    # 학습 부분
    for images, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # 검증 부분
    model.eval()
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Validation Epoch {epoch+1}"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    # 에폭별 손실 출력
    avg_train_loss = train_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)
    print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss}, Val Loss: {avg_val_loss}')

    # 스케줄러 업데이트
    scheduler.step(avg_val_loss)

    # 검증 손실이 개선되었는지 확인하고 모델 저장
    if avg_val_loss < min_val_loss:
        min_val_loss = avg_val_loss
        torch.save(model.state_dict(), './snack_pt_0517_effi/best.pt')
        print(f"Model saved: Epoch {epoch+1} with Val Loss: {avg_val_loss:.4f}")

    # 마지막 모델 상태 저장
    torch.save(model.state_dict(), './snack_pt_0517_effi/last.pt')


Training Epoch 1:   0%|          | 0/1463 [00:00<?, ?it/s]

Training Epoch 1:   3%|▎         | 38/1463 [01:14<40:26,  1.70s/it] 