In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
import numpy as np
import torchsummary
import os
from PIL import Image
from tqdm.auto import tqdm

torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3070 Ti'

In [2]:
# 모델정의
class ResNet50Classifier(nn.Module):
    def __init__(self, num_classes=4800, freeze_resnet=True):
        super(ResNet50Classifier, self).__init__()
        
        # Resnet50 model
        # pretrained 모델 사용
        self.backborn = models.resnet50(pretrained=True)
        
        # pretrained weight freeze 여부
        if freeze_resnet:
            for param in self.backborn.parameters():
                param.requires_grad = False
        
        # resnet50 출력채널수
        num_features = self.backborn.fc.in_features
        
        # resnet50의 마지막 출력채널을 제거
        self.backborn.fc = nn.Identity()
        
        # 우리가 분류할 class만큼 full connected 레이어 추가
        num_intermediate = (num_features + num_classes) // 2
        self.intermediate = nn.Linear(num_features, num_intermediate)
        self.classifier = nn.Linear(num_intermediate, num_classes)
    
    def forward(self, x):
        x = self.backborn(x)
        x = self.intermediate(x)
        x = self.classifier(x)
        return x

In [3]:
# 이미지 투명도 제거 Transform
class RemoveAlpha:
    def __call__(self, img):
        img = img.convert('RGB')
        return img

# 성능을 위해 줄임
batch_size = 64
    
# 데이터 로더
base_path = "e:\\pill_image_cropped" # './sample_data'

# 데이터셋 전처리 이미 되어있음
transform = transforms.Compose([
#     RemoveAlpha(),
#     transforms.CenterCrop(1200),
#     transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    # transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

image_dataset = ImageFolder(base_path, transform=transform)
print(len(image_dataset.classes))

val_size = int(len(image_dataset) * 0.2)
train_size = len(image_dataset) - val_size

train_dataset, val_dataset = torch.utils.data.random_split(image_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, prefetch_factor=1)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, prefetch_factor=1)

5087


In [4]:
# 학습함수
def train(model, train_loader, valid_loader, criterion, optimizer, device, epochs, scheduler = None):
    result = []
    model.to(device)
    # print(torchsummary.summary(model, (3, 224, 224)))
    # epochs 만큼 반복
    for epoch in tqdm(range(epochs)):
        # 캐시 비우기
        torch.cuda.empty_cache()
        # train모드
        model.train()
        
        # train 정확도
        train_loss = 0.0
        train_accuracy = 0.0
        train_total = 0
        
        # train 데이터 가져옴
        for images, labels in tqdm(train_loader, leave=False):
            # 장치로 보냄
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            
            # loss 계산
            loss = criterion(outputs, labels)
            
            # backpropagation
            loss.backward()
            optimizer.step()
            
            # train accuracy 계산
            _, pred = torch.max(outputs, 1)
            accuracy = torch.sum(pred == labels.data)
            train_accuracy += accuracy.item()
            train_total += labels.size(0)
            train_loss += loss.item() * images.size(0)            
            del images, labels
        train_loss /= len(train_loader.dataset)
        train_accuracy /= train_total
        # validation
        valid_loss = 0.0
        valid_accuracy = 0.0
        # eval모드
        model.eval()
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                
                _, pred = torch.max(outputs, 1)
                accuracy = torch.sum(pred == labels.data)
                
                valid_loss += loss.item() * images.size(0)
                valid_accuracy += accuracy.item()
        valid_loss /= len(valid_loader.dataset)
        valid_accuracy /= len(valid_loader.dataset)
        
        print(f'Epoch {epoch+1}/{epochs} : loss : {train_loss:.3f}, accuracy : {train_accuracy:.3f}, valid_loss : {valid_loss:.3f}, valid_accuracy : {valid_accuracy:.3f}')
        result.append((train_loss, train_accuracy, valid_loss, valid_accuracy))
        
        if scheduler is not None:
            scheduler.step()
    return result

In [None]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

model = ResNet50Classifier(num_classes=5087, freeze_resnet=False)
# train_loader = 
# valid_loader = None
epochs = 12
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
# epoch의 80%를 완료하면 learning rate 변경
scheduler = StepLR(optimizer, step_size = int(epochs * 0.8), gamma=0.1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

result = train(model, train_loader, val_loader, criterion, optimizer, device, epochs, scheduler)

torch.save(model, "0513_12e_resnet50_unfreeze_model.pt")



cuda


  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/2577 [00:04<?, ?it/s]

Epoch 1/12 : loss : 5.116, accuracy : 0.153, valid_loss : 2.037, valid_accuracy : 0.109


  0%|          | 0/2577 [00:04<?, ?it/s]

In [6]:
# gpu 모니터
# nvidia-smi --query-gpu utilization.gpu,utilization.memory,memory.total,memory.free,memory.used --format=csv,noheader -lms 1000

In [13]:
# 수행결과 확인
for r in result:
    print(f'train_loss {r[0]:.3f}\ttrain_accu {r[1]:.3f}\tval_loss {r[2]:.3f}\tval_accu {r[3]:.3f}')

train_loss 5.116	train_accu 0.153	val_loss 2.037 	val_accu 0.109
train_loss 1.159	train_accu 0.674	val_loss 0.705 	val_accu 0.216
train_loss 0.538	train_accu 0.840	val_loss 0.522 	val_accu 0.173
train_loss 0.361	train_accu 0.892	val_loss 0.459 	val_accu 0.251
train_loss 0.285	train_accu 0.915	val_loss 0.164 	val_accu 0.792
train_loss 0.211	train_accu 0.936	val_loss 0.297 	val_accu 0.363
train_loss 0.184	train_accu 0.945	val_loss 0.365 	val_accu 0.598
train_loss 0.160	train_accu 0.953	val_loss 0.155 	val_accu 0.827
train_loss 0.139	train_accu 0.959	val_loss 0.110 	val_accu 0.900
train_loss 0.025	train_accu 0.992	val_loss 0.015 	val_accu 0.990
train_loss 0.011	train_accu 0.996	val_loss 0.031 	val_accu 0.990
train_loss 0.008	train_accu 0.997	val_loss 0.005 	val_accu 0.990


In [8]:
# 메모리 정리
try:
    del model
except:
    pass
torch.cuda.empty_cache()
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    772 MiB |   5996 MiB | 657618 GiB | 657618 GiB |
|       from large pool |    704 MiB |   5943 MiB | 655894 GiB | 655893 GiB |
|       from small pool |     68 MiB |    103 MiB |   1724 GiB |   1724 GiB |
|---------------------------------------------------------------------------|
| Active memory         |    772 MiB |   5996 MiB | 657618 GiB | 657618 GiB |
|       from large pool |    704 MiB |   5943 MiB | 655894 GiB | 655893 GiB |
|       from small pool |     68 MiB |    103 MiB |   1724 GiB |   1724 GiB |
|---------------------------------------------------------------