In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import os
import json
import numpy as np


In [4]:
!pip show torch
print(torch.__version__)

Name: torch2.3.0

Version: 2.3.0
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: C:\Users\Na\anaconda3\envs\pytorch_practice\Lib\site-packages
Requires: filelock, fsspec, jinja2, mkl, networkx, sympy, typing-extensions
Required-by: torchaudio, torchvision


In [5]:
print(torch.cuda.is_available())
torch.cuda.get_device_name()
torch.cuda.device(0)

True


<torch.cuda.device at 0x21e587eaa90>

In [6]:
!nvidia-smi

Wed May 29 21:38:53 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 536.19                 Driver Version: 536.19       CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 Ti   WDDM  | 00000000:01:00.0  On |                  N/A |
|  0%   54C    P8              13W / 200W |   1252MiB /  8192MiB |      8%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# custom dataset load

In [7]:
unique_classes = [
    "c_1", "c_2_01", "c_2_02", "c_3", "c_4_01_02", "c_4_02_01_02", "c_4_02_02_02",
    "c_4_02_03_02", "c_4_03", "c_5_02", "c_6", "c_7", "c_1_01", "c_2_02_01", "c_3_01",
    "c_4_03_01", "c_5_01_01", "c_5_02_01", "c_6_01", "c_7_01", "c_4_01_01", "c_4_02_01_01",
    "c_4_02_02_01", "c_4_02_03_01", "c_5_01", "c_8_01", "c_8_02", "c_8_01_01", "c_9"
]
num_classes = len(unique_classes)

In [8]:
class CustomDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        
        label_name = img_name.replace('.jpg', '.json')
        label_path = os.path.join(self.label_dir, label_name)
        with open(label_path, 'r') as f:
            label_data = json.load(f)
        
        class_names = [obj['class_name'] for obj in label_data['objects']]
        label = self.encode_labels(class_names)  # 다중 라벨을 인코딩
        
        if self.transform:
            image = self.transform(image)
        
        return image, label
    
    def encode_labels(self, class_names):
        labels = torch.zeros(num_classes)
        for class_name in class_names:
            if class_name in unique_classes:
                labels[unique_classes.index(class_name)] = 1
        return labels

In [9]:
# 데이터 전처리 정의
data_transforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 전체 데이터셋 정의
dataset = CustomDataset(image_dir='C:/Users/Na/Desktop/data/images', label_dir='C:/Users/Na/Desktop/data/labels', transform=data_transforms)

# 데이터셋을 6:2:2 비율로 분할
total_size = len(dataset)
train_size = int(0.6 * total_size)
val_size = int(0.2 * total_size)
test_size = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# 데이터로더 정의
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

dataloaders = {'train': train_loader, 'val': val_loader, 'test': test_loader}
dataset_sizes = {'train': train_size, 'val': val_size, 'test': test_size}

In [10]:
class AlexNet(nn.Module):
    def __init__(self, num_classes):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
            nn.Sigmoid()  # 다중 라벨 분류를 위한 sigmoid 활성화 함수
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# 모델 인스턴스 생성 및 GPU로 이동
model = AlexNet(num_classes=num_classes)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)


In [11]:
# 손실 함수 및 옵티마이저 정의
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

def train_model(model, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    preds = (outputs > 0.5).float()  # 0.5를 기준으로 라벨 예측
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / (dataset_sizes[phase] * num_classes)  # 정확도 계산

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        print()

    return model

# 모델 학습 실행
model = train_model(model, criterion, optimizer, num_epochs=25)

def evaluate_model(model, phase='test'):
    model.eval()
    corrects = 0

    for inputs, labels in dataloaders[phase]:
        inputs = inputs.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            outputs = model(inputs)
            preds = (outputs > 0.5).float()
            corrects += torch.sum(preds == labels.data)

    accuracy = corrects.double() / (dataset_sizes[phase] * num_classes)
    print(f'{phase.capitalize()} Accuracy: {accuracy:.4f}')

# 모델 평가
evaluate_model(model, phase='val')
evaluate_model(model, phase='test')


Epoch 0/24
----------
