In [11]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms


class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform 
        self.image_paths = []
        self.labels = []
        
        class_names = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
        # class_names = sorted(os.listdir(root_dir))
        
        self.class_to_idx = {class_name: i for i, class_name in enumerate(class_names)}
        
        for class_name in class_names:
            class_path = os.path.join(root_dir, class_name)
            for image_name in os.listdir(class_path):
                if image_name.lower().endswith((".png", '.jpg', '.jpeg')):
                    self.image_paths.append(os.path.join(class_path, image_name))
                    self.labels.append(self.class_to_idx[class_name])
                    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label


data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

data_path = "data/output_frames/"

custom_dataset = CustomImageDataset(root_dir=data_path, transform=data_transforms)

dataset_size = len(custom_dataset)
train_size = int(dataset_size * 0.8)
test_size = dataset_size - train_size

train_dataset, test_dataset = random_split(custom_dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0) 
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0) 

image, labels = next(iter(train_loader))
print(f"이미지 배치(Batch)의 형태(shape): {image.shape}")
print(f"레이블 배치(Batch)의 형태(shape): {labels.shape}")
print(f"첫 번째 이미지의 레이블: {labels[0]}")

이미지 배치(Batch)의 형태(shape): torch.Size([32, 3, 224, 224])
레이블 배치(Batch)의 형태(shape): torch.Size([32])
첫 번째 이미지의 레이블: 5


In [12]:
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = nn.Flatten()
        
        self.fc1 = nn.Linear(64*56*56, 512)
        self.fc2 = nn.Linear(512, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.flatten(x)
        
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x
    
model = SimpleCNN()
x = torch.randn(32, 3, 224, 224)
model(x).size()

torch.Size([32, 10])

In [4]:
# from torchvision.models import resnet18, ResNet18_Weights

# model = resnet18(ResNet18_Weights.DEFAULT)

# for param in model.parameters():
#     param.requires_grad() = False
    
# num_features = model.fc.in_features
# model.fc = nn.Linear(num_features, 10)

In [5]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=4096, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
)

In [6]:
def train(model, train_loader, criterion, optimizer, epochs):
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        for data in train_loader:
            X, y = data
            X, y = X.to(device), y.to(device)
            
            optimizer.zero_grad()
            
            output = model(X)
            
            loss = criterion(output, y)
            
            loss.backward()
            
            optimizer.step()
            
            running_loss += loss.item()
            
        print(f"epoch:{epoch+1}, loss : {running_loss / len(train_loader)}")
            
train(model, train_loader, criterion, optimizer, 5)
            

epoch:1, loss : 1.7784260958052047
epoch:2, loss : 1.5587548331531418
epoch:3, loss : 1.462703812610158
epoch:4, loss : 1.4008450547752478
epoch:5, loss : 1.3561384155012457


In [17]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix

def evaluate(model, test_loader):
    model.eval() # 모델을 평가 모드로 설정
    all_preds = []
    all_labels = []

    # 평가 시에는 기울기 계산이 필요 없으므로 no_grad() 사용
    with torch.no_grad():
        for data in test_loader:
            X, y = data
            X, y = X.to(device), y.to(device)
            
            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
    
    # 1. 평가 지표 계산
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='macro')
    
    
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    
evaluate(model, test_loader)

Accuracy: 0.5579
Precision: 0.5572
Recall: 0.5579
F1 Score: 0.5464
