In [1]:
#@title Google Drive 마운트
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# @title Google Drive에서 파일 복사
import shutil
import os

# 파일 복사 함수
def copy_if_exists(source_path, destination_path):
    if os.path.exists(source_path):
        shutil.copy(source_path, destination_path)
        print(f"Copied: {source_path} to {destination_path}")
    else:
        print(f"File not found: {source_path}")

# Colab 내 저장 경로
tar_path                = "/content/sketch.tar"
best_model_path         = "/content/best_model.pth"
checkpoint_path         = "/content/sketch_classification_checkpoint.pth"

# Google Drive 파일 경로
remote_tar_path         = "/content/drive/MyDrive/colab/toy project sketch classification/cnn/sketch.tar"
remote_best_model_path  = "/content/drive/MyDrive/colab/toy project sketch classification/cnn/best_model.pth"
remote_checkpoint_path  = "/content/drive/MyDrive/colab/toy project sketch classification/cnn/sketch_classification_checkpoint.pth"

# 파일 복사
copy_if_exists(remote_tar_path, tar_path)
copy_if_exists(remote_best_model_path, best_model_path)
copy_if_exists(remote_checkpoint_path, checkpoint_path)


Copied: /content/drive/MyDrive/colab/toy project sketch classification/cnn/sketch.tar to /content/sketch.tar
File not found: /content/drive/MyDrive/colab/toy project sketch classification/cnn/best_model.pth
File not found: /content/drive/MyDrive/colab/toy project sketch classification/cnn/sketch_classification_checkpoint.pth


In [3]:
# @title 파일 압축 풀기

import tarfile

# 파일 경로 및 대상 디렉토리 설정
extract_dir = "/content/dataset"

# 디렉토리 생성 (이미 존재하면 무시)
os.makedirs(extract_dir, exist_ok=True)

# tar 파일 압축 풀기
with tarfile.open(tar_path, "r") as tar:
    tar.extractall(path=extract_dir)

print(f"압축 해제 완료: {extract_dir}")

압축 해제 완료: /content/dataset


In [4]:
# @title 데이터셋 분리 (미리 수행해서 코드만 남김)

# import os
# import random
# import shutil

# # 데이터 경로 설정
# base_dir = "/content/images/tx_000000000000"
# train_dir = "/content/dataset/train"
# val_dir = "/content/dataset/val"
# test_dir = "/content/dataset/test"

# # 데이터셋 디렉토리 생성
# os.makedirs(train_dir, exist_ok=True)
# os.makedirs(val_dir, exist_ok=True)
# os.makedirs(test_dir, exist_ok=True)

# # 각 클래스 처리
# for class_name in os.listdir(base_dir):
#     class_path = os.path.join(base_dir, class_name)

#     if os.path.isdir(class_path):
#         # 이미지 파일 목록 가져오기
#         images = [os.path.join(class_path, img) for img in os.listdir(class_path) if img.endswith(".png")]

#         # 파일 무작위 셔플
#         random.shuffle(images)

#         # 파일 개수 계산
#         total_count = len(images)
#         train_count = int(total_count * 0.9)
#         val_count = int(total_count * 0.05)

#         # 데이터 분할
#         train_files = images[:train_count]
#         val_files = images[train_count:train_count + val_count]
#         test_files = images[train_count + val_count:]

#         # 파일 복사
#         for file in train_files:
#             class_train_dir = os.path.join(train_dir, class_name)
#             os.makedirs(class_train_dir, exist_ok=True)
#             shutil.copy(file, class_train_dir)

#         for file in val_files:
#             class_val_dir = os.path.join(val_dir, class_name)
#             os.makedirs(class_val_dir, exist_ok=True)
#             shutil.copy(file, class_val_dir)

#         for file in test_files:
#             class_test_dir = os.path.join(test_dir, class_name)
#             os.makedirs(class_test_dir, exist_ok=True)
#             shutil.copy(file, class_test_dir)

# print("데이터 분할 완료!")
# print(f"Training data saved in: {train_dir}")
# print(f"Validation data saved in: {val_dir}")
# print(f"Test data saved in: {test_dir}")


In [5]:
# @title 모델 학습/검증, 체크포인트 저장/불러오기 (공통사용)

import torch
from tqdm import tqdm

# 학습 함수
def train(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss, correct = 0, 0
    progress_bar = tqdm(dataloader, desc="Training", leave=False)
    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()
        progress_bar.set_postfix({"Loss": loss.item(), "Accuracy": correct / len(dataloader.dataset)})
    accuracy = correct / len(dataloader.dataset)
    return total_loss / len(dataloader), accuracy

# 평가 함수
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss, correct = 0, 0
    progress_bar = tqdm(dataloader, desc="Validating", leave=False)
    with torch.no_grad():
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
            progress_bar.set_postfix({"Loss": loss.item(), "Accuracy": correct / len(dataloader.dataset)})
    accuracy = correct / len(dataloader.dataset)
    return total_loss / len(dataloader), accuracy

# 체크포인트 저장 함수
def save_checkpoint(model, optimizer, epoch, train_history, val_history, best_val_acc, checkpoint_path):
    checkpoint = {
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
        "epoch": epoch,
        "train_history": train_history,
        "val_history": val_history,
        "best_val_acc": best_val_acc,
    }
    torch.save(checkpoint, checkpoint_path)
    print(f"Checkpoint saved at epoch {epoch} with Validation Accuracy: {best_val_acc:.4f}")

# 체크포인트 불러오기 함수
def load_checkpoint(checkpoint_path, model, optimizer):
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        epoch = checkpoint["epoch"]
        train_history = checkpoint["train_history"]
        val_history = checkpoint["val_history"]
        best_val_acc = checkpoint["best_val_acc"]
        print(f"Checkpoint loaded from epoch {epoch} with Validation Accuracy: {best_val_acc:.4f}")
        return epoch, train_history, val_history, best_val_acc
    else:
        print("No checkpoint found, starting from scratch.")
        return 0, [], [], 0.0

In [6]:
# @title 모델
from torch import nn
import torch.nn.functional as F

# CNN 기반 분류 모델 정의
class CNNClassification(nn.Module):
    def __init__(self, num_classes=125):
        super(CNNClassification, self).__init__()

        # Convolutional Layers
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)  # 256x256 -> 256x256
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)                  # 256x256 -> 128x128
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1) # 128x128 -> 128x128
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1) # 64x64 -> 64x64

        flattened_size = 64 * 32 * 32

        # Fully Connected Layers
        self.fc1 = nn.Linear(flattened_size, 512)  # Flatten 크기에 맞게 수정
        self.fc2 = nn.Linear(512, num_classes)  # Output layer for 125 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)  # Flatten for Fully Connected
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [7]:
# @title 데이터셋, 데이터로더, 전처리기, 옵티마이저, 손실함수
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# 데이터셋 경로 설정
train_dir = "/content/dataset/train"
val_dir = "/content/dataset/val"
test_dir = "/content/dataset/test"

transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 이미지 크기 조정
    transforms.ToTensor(),          # 0~255 -> 0~1로 변환
])

# 데이터 로더 생성
def create_dataloader(data_dir, batch_size=32, shuffle=True):
    dataset = datasets.ImageFolder(data_dir, transform=transform)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return dataloader, dataset

train_loader, train_dataset = create_dataloader(train_dir, batch_size=256, shuffle=True)
val_loader, val_dataset = create_dataloader(val_dir, batch_size=256, shuffle=False)
test_loader, test_dataset = create_dataloader(test_dir, batch_size=256, shuffle=False)

# 모델 초기화
num_classes = len(train_dataset.classes)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CNNClassification(num_classes).to(device)
best_model = CNNClassification(num_classes).to(device)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# @title 학습, 시각화

import torch
import matplotlib.pyplot as plt

# 학습 설정
num_epochs = 100

# 초기화 또는 체크포인트 불러오기
start_epoch, train_history, val_history, best_val_acc = load_checkpoint(checkpoint_path, model, optimizer)

# 학습 루프
for epoch in range(start_epoch, num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = evaluate(model, val_loader, criterion, device)

    # 기록 업데이트
    train_history.append((train_loss, train_acc))
    val_history.append((val_loss, val_acc))

    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
    print(f"  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    # 최고 성능 모델 저장
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)
        print(f"  Best model saved with Validation Accuracy: {best_val_acc:.4f}")

    # 체크포인트 저장
    save_checkpoint(model, optimizer, epoch+1, train_history, val_history, best_val_acc, checkpoint_path)

# 기록 시각화
epochs = range(1, len(train_history) + 1)
train_losses, train_accuracies = zip(*train_history)
val_losses, val_accuracies = zip(*val_history)

plt.figure(figsize=(12, 5))
plt.plot(epochs, train_losses, label="Train Loss")
plt.plot(epochs, val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Over Epochs")
plt.legend()
plt.show()

plt.figure(figsize=(12, 5))
plt.plot(epochs, train_accuracies, label="Train Accuracy")
plt.plot(epochs, val_accuracies, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy Over Epochs")
plt.legend()
plt.show()

No checkpoint found, starting from scratch.
Epoch 1/100




  Train Loss: 3.9700, Train Acc: 0.1341
  Val Loss: 3.1852, Val Acc: 0.2574
  Best model saved with Validation Accuracy: 0.2574
Checkpoint saved at epoch 1 with Validation Accuracy: 0.2574
Epoch 2/100


Training:  53%|█████▎    | 140/266 [02:09<01:51,  1.13it/s, Loss=2.6, Accuracy=0.178]

In [None]:
# 테스트 평가
best_model.load_state_dict(torch.load(best_model_path))
test_loss, test_acc = evaluate(best_model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

In [None]:
# @title 결과 파일 복사
copy_if_exists(best_model_path, remote_best_model_path)
copy_if_exists(checkpoint_path, remote_checkpoint_path)

In [None]:
from google.colab import runtime
runtime.unassign()