In [1]:
import subprocess
import time
import threading
import torch
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader, ConcatDataset
from collections import Counter
import torch.cuda.amp as amp
from torch import nn, optim
import copy
from torch.optim import lr_scheduler
import timm
from PIL import Image
import numpy as np
import os
import torchvision.utils as vutils

In [2]:
# DCGAN의 Generator 정의
class Generator(nn.Module):
    def __init__(self, nz, ngf, nc):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        return self.main(input)

# DCGAN의 Discriminator 정의
class Discriminator(nn.Module):
    def __init__(self, nc, ndf):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

In [3]:
def get_gpu_usage():
    result = subprocess.run(['nvidia-smi', '--query-gpu=utilization.gpu', '--format=csv,noheader,nounits'], 
                            stdout=subprocess.PIPE)
    return int(result.stdout.decode('utf-8').strip())

class DynamicDataLoader:
    def __init__(self, dataset, batch_size=32, num_workers=4, pin_memory=True, prefetch_factor=2):
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.pin_memory = pin_memory
        self.prefetch_factor = prefetch_factor
        self.loader = self.create_loader()
        self.adjusting = False
        self.target_gpu_usage = 95  # Target GPU usage in percent

    def create_loader(self):
        return DataLoader(self.dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, 
                          pin_memory=self.pin_memory, prefetch_factor=self.prefetch_factor, persistent_workers=True)

    def adjust_num_workers(self):
        while self.adjusting:
            gpu_usage = get_gpu_usage()
            print(f"Current GPU usage: {gpu_usage}%")
            if gpu_usage < self.target_gpu_usage - 10 and self.num_workers < 16:
                self.num_workers += 1
                print(f"Increasing num_workers to {self.num_workers}")
            elif gpu_usage > self.target_gpu_usage + 10 and self.num_workers > 1:
                self.num_workers -= 1
                print(f"Decreasing num_workers to {self.num_workers}")
            self.loader = self.create_loader()
            time.sleep(20)

    def start_adjusting(self):
        self.adjusting = True
        self.adjust_thread = threading.Thread(target=self.adjust_num_workers)
        self.adjust_thread.start()

    def stop_adjusting(self):
        self.adjusting = False
        self.adjust_thread.join()

    def get_loader(self):
        return self.loader


In [4]:
# 파라미터 설정
batch_size = 64
image_size = 64
nz = 100
ngf = 64
ndf = 64
nc = 3
num_epochs = 50
lr = 0.0001
beta1 = 0.5
ngpu = 1

In [5]:
base_dir = './cat_faces/'

# 데이터 전처리 및 증강
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(331),  # 이미지를 331로 리사이즈
        transforms.CenterCrop(299),  # 299x299로 중심 자르기
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(30),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),  # 추가적인 데이터 증강
        transforms.RandomGrayscale(p=0.2),  # 추가적인 데이터 증강
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(331),
        transforms.CenterCrop(299),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# 전체 데이터셋 로드
full_dataset = datasets.ImageFolder(base_dir, transform=data_transforms['train'])

# 클래스별 이미지 개수 출력
class_counts = Counter([full_dataset.targets[i] for i in range(len(full_dataset))])
print("Original class distribution:", class_counts)

print("Splitting dataset into training and validation sets...")
# 데이터셋을 훈련과 검증 세트로 나누기 (예: 80% 훈련, 20% 검증)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# 훈련 데이터셋과 검증 데이터셋 각각에 다른 변환 적용
train_dataset.dataset.transform = data_transforms['train']
val_dataset.dataset.transform = data_transforms['val']

# DynamicDataLoader 사용
dynamic_loader = DynamicDataLoader(train_dataset, batch_size=32, num_workers=4, pin_memory=True, prefetch_factor=4)
dynamic_loader.start_adjusting()

val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True, prefetch_factor=4, persistent_workers=True)

dataloaders = {'train': dynamic_loader.get_loader(), 'val': val_loader}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}
class_names = full_dataset.classes

print("Training and validation data are ready.")


Original class distribution: Counter({0: 1000, 1: 1000, 2: 1000, 3: 1000, 4: 1000, 5: 1000, 6: 1000, 7: 1000, 8: 1000})
Splitting dataset into training and validation sets...
Training and validation data are ready.


In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
# DCGAN 학습
dataset = datasets.ImageFolder(root=base_dir,
                           transform=transforms.Compose([
                               transforms.Resize(image_size),
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ]))
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# 모델 초기화
netG = Generator(nz, ngf, nc).to(device)
netD = Discriminator(nc, ndf).to(device)

# 손실 함수와 최적화 설정
criterion = nn.BCELoss()
fixed_noise = torch.randn(64, nz, 1, 1, device=device)
real_label = 1.
fake_label = 0.

optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

# 학습 루프
for epoch in range(num_epochs):
    for i, data in enumerate(dataloader, 0):
        # 실제 이미지로 Discriminator 학습
        netD.zero_grad()
        real_cpu = data[0].to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
        output = netD(real_cpu).view(-1)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        # 가짜 이미지로 Discriminator 학습
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake = netG(noise)
        label.fill_(fake_label)
        output = netD(fake.detach()).view(-1)
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        errD = errD_real + errD_fake
        optimizerD.step()

        # Generator 학습
        netG.zero_grad()
        label.fill_(real_label)
        output = netD(fake).view(-1)
        errG = criterion(output, label)
        errG.backward()
        D_G_z2 = output.mean().item()
        optimizerG.step()

        if i % 50 == 0:
            print(f'[{epoch}/{num_epochs}][{i}/{len(dataloader)}] '
                  f'Loss_D: {errD.item():.4f} Loss_G: {errG.item():.4f} '
                  f'D(x): {D_x:.4f} D(G(z)): {D_G_z1:.4f} / {D_G_z2:.4f}')

    # 학습 진행 상황 시각화
    if epoch % 10 == 0:
        vutils.save_image(real_cpu, f'real_samples_epoch_{epoch}.png', normalize=True)
        fake = netG(fixed_noise)
        vutils.save_image(fake.detach(), f'fake_samples_epoch_{epoch}.png', normalize=True)

# 최종 Generator 저장
torch.save(netG.state_dict(), 'dcgan_generator.pth')

# 생성된 이미지 저장 경로 설정
gen_images_dir = './generated_images/'
os.makedirs(gen_images_dir, exist_ok=True)

# 학습된 Generator 로드
netG.load_state_dict(torch.load('dcgan_generator.pth'))
netG.eval()

# 각 품종에 대한 새로운 이미지 생성 및 저장
for class_name in class_names:
    class_dir = os.path.join(gen_images_dir, class_name)
    os.makedirs(class_dir, exist_ok=True)
    for i in range(100):  # 각 품종당 100장 생성
        noise = torch.randn(1, nz, 1, 1, device=device)
        with torch.no_grad():
            fake = netG(noise).detach().cpu()
        fake_image = (fake[0] * 0.5 + 0.5) * 255
        fake_image = fake_image.permute(1, 2, 0).numpy().astype(np.uint8)
        img = Image.fromarray(fake_image)
        img.save(os.path.join(class_dir, f'fake_{i}.png'))

Current GPU usage: 9%
Increasing num_workers to 5
[0/50][0/141] Loss_D: 1.5440 Loss_G: 1.6485 D(x): 0.5070 D(G(z)): 0.5655 / 0.1976
[0/50][50/141] Loss_D: 0.0340 Loss_G: 6.8432 D(x): 0.9884 D(G(z)): 0.0217 / 0.0012
[0/50][100/141] Loss_D: 0.0399 Loss_G: 7.8357 D(x): 0.9866 D(G(z)): 0.0255 / 0.0004
[1/50][0/141] Loss_D: 0.3478 Loss_G: 18.5285 D(x): 0.9956 D(G(z)): 0.2731 / 0.0000
[1/50][50/141] Loss_D: 0.2426 Loss_G: 6.3566 D(x): 0.8489 D(G(z)): 0.0079 / 0.0064
[1/50][100/141] Loss_D: 0.4415 Loss_G: 4.6824 D(x): 0.9111 D(G(z)): 0.2502 / 0.0167
[2/50][0/141] Loss_D: 0.4095 Loss_G: 3.3509 D(x): 0.7474 D(G(z)): 0.0504 / 0.0436
[2/50][50/141] Loss_D: 0.0751 Loss_G: 5.9067 D(x): 0.9661 D(G(z)): 0.0364 / 0.0040
[2/50][100/141] Loss_D: 0.2791 Loss_G: 4.0427 D(x): 0.9004 D(G(z)): 0.1343 / 0.0218
[3/50][0/141] Loss_D: 0.2365 Loss_G: 3.8679 D(x): 0.9151 D(G(z)): 0.1261 / 0.0270
Current GPU usage: 37%
Increasing num_workers to 6
[3/50][50/141] Loss_D: 0.3050 Loss_G: 2.3236 D(x): 0.8105 D(G(z)): 0.

In [8]:
# 기존 데이터셋에 생성된 이미지 추가
new_data_transforms = transforms.Compose([
    transforms.Resize(331),
    transforms.CenterCrop(299),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

generated_dataset = datasets.ImageFolder(root=gen_images_dir, transform=new_data_transforms)
full_dataset = ConcatDataset([full_dataset, generated_dataset])

# 기존 데이터셋을 다시 분할
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_dataset.dataset.transform = data_transforms['train']
val_dataset.dataset.transform = data_transforms['val']

dynamic_loader = DynamicDataLoader(train_dataset, batch_size=32, num_workers=4, pin_memory=True, prefetch_factor=4)
dynamic_loader.start_adjusting()

val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True, prefetch_factor=4, persistent_workers=True)

dataloaders = {'train': dynamic_loader.get_loader(), 'val': val_loader}
dataset_sizes = {'train': len(train_dataset), 'val': len(val_dataset)}
class_names = full_dataset.datasets[0].classes

In [9]:
# InceptionResNetV2 모델 로드
base_model = timm.create_model('inception_resnet_v2', pretrained=True).to(device)

# 모델의 출력 크기를 확인
dummy_input = torch.randn(1, 3, 299, 299).to(device)
base_model.eval()
with torch.no_grad():
    dummy_output = base_model.forward_features(dummy_input)
    num_features = dummy_output.shape[1] * dummy_output.shape[2] * dummy_output.shape[3]
    print(f'Output features: {num_features}')

class CustomModel(nn.Module):
    def __init__(self, base_model, num_classes):
        super(CustomModel, self).__init__()
        self.base_model = base_model
        self.dropout = nn.Dropout(p=0.5)
        self.fc = nn.Linear(num_features, num_classes)
       
    def forward(self, x):
        x = self.base_model.forward_features(x)
        x = self.dropout(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc(x)
        return x

model = CustomModel(base_model, len(class_names)).to(device)

# 모든 레이어의 requires_grad를 True로 설정하여 고정 해제
for param in model.parameters():
    param.requires_grad = True

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scaler = amp.GradScaler()
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

Current GPU usage: 24%
Increasing num_workers to 5
Output features: 98304


In [10]:
# 모델 학습 및 저장
num_epochs = 50

best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
patience = 12  # 조기 종료를 위한 patience 설정
trigger_times = 0  # 조기 종료를 위한 트리거 시간 초기화

for epoch in range(num_epochs):
    print(f'Epoch {epoch}/{num_epochs - 1}')
    print('-' * 10)

    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
        else:
            model.eval()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                with amp.autocast():
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                if phase == 'train':
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        if phase == 'val' and epoch_acc > best_acc:
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            trigger_times = 0  # 조기 종료 트리거 초기화
            
        elif phase == 'val':
            trigger_times += 1
            if trigger_times >= patience:
                print('Early stopping!')
                model.load_state_dict(best_model_wts)
                dynamic_loader.stop_adjusting()  # 동적 조정 멈춤
                exit()  # 학습 종료
        
        if phase == 'val':
            val_loss = epoch_loss  # validation 손실 저장

    scheduler.step(val_loss)
    print()

print('Training complete')
print(f'Best val Acc: {best_acc:4f}')

model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'cat_breeds_inception_resnet_v2_1000.pth')

# 동적 조정 멈춤
dynamic_loader.stop_adjusting()

Epoch 0/49
----------
Current GPU usage: 8%
Current GPU usage: 72%
Increasing num_workers to 6
Current GPU usage: 78%
Current GPU usage: 74%
Increasing num_workers to 7
train Loss: 1.6805 Acc: 0.3949
Current GPU usage: 5%
val Loss: 1.4076 Acc: 0.4796

Epoch 1/49
----------
Current GPU usage: 71%
Increasing num_workers to 8
Current GPU usage: 55%
Current GPU usage: 84%
Increasing num_workers to 9
Current GPU usage: 80%
train Loss: 1.0689 Acc: 0.6034
val Loss: 1.3671 Acc: 0.4926

Epoch 2/49
----------
Current GPU usage: 75%
Increasing num_workers to 10
Current GPU usage: 85%
Current GPU usage: 76%
Increasing num_workers to 11
Current GPU usage: 69%
train Loss: 0.6540 Acc: 0.7549
val Loss: 1.5434 Acc: 0.4986

Epoch 3/49
----------
Current GPU usage: 71%
Increasing num_workers to 12
Current GPU usage: 68%
Current GPU usage: 59%
Increasing num_workers to 13
Current GPU usage: 73%
train Loss: 0.4709 Acc: 0.8240
val Loss: 1.8308 Acc: 0.4949

Epoch 4/49
----------
Current GPU usage: 65%
Increa

KeyboardInterrupt: 