In [5]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import numpy as np

# GPU 사용 가능 여부 확인
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# 데이터셋 다운로드 및 전처리
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# 레이블이 있는 데이터와 레이블이 없는 데이터 분리 (10% 레이블 사용)
labeled_indices = np.random.choice(len(train_dataset), size=int(0.1 * len(train_dataset)), replace=False)
unlabeled_indices = list(set(range(len(train_dataset))) - set(labeled_indices))

# 학습 데이터와 검증 데이터 분리
np.random.shuffle(labeled_indices)
split = int(0.8 * len(labeled_indices))
train_indices = labeled_indices[:split]
val_indices = labeled_indices[split:]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
unlabeled_sampler = SubsetRandomSampler(unlabeled_indices)

# DataLoader 설정 (GPU로 전송)
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=2, pin_memory=True)
val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler, num_workers=2, pin_memory=True)
unlabeled_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=unlabeled_sampler, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# 간단한 CNN 모델 정의
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 모델 초기화 및 GPU 설정
model = SimpleCNN().to(device)

# 모델 학습 함수 정의
def train(model, train_loader, val_loader, optimizer, criterion, epochs, patience=3):
    best_val_loss = float('inf')
    no_improvement = 0
    
    for epoch in range(epochs):
        start_time = time.time()
        
        model.train()
        total_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch + 1}/{epochs}], Avg Train Loss: {avg_train_loss:.4f}, Time: {time.time() - start_time:.2f} sec')

        # Validation 과정
        model.eval()
        total_val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                val_loss = criterion(outputs, labels)
                total_val_loss += val_loss.item()

        avg_val_loss = total_val_loss / len(val_loader)
        print(f'Epoch [{epoch + 1}/{epochs}], Avg Val Loss: {avg_val_loss:.4f}')

        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            no_improvement = 0
        else:
            no_improvement += 1
            if no_improvement >= patience:
                print(f'Early stopping! No improvement for {patience} epochs.')
                break

# 모델 및 최적화 함수 설정
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# 모델 학습 및 early stopping 적용
train(model, train_loader, val_loader, optimizer, criterion, epochs=100, patience=5)

# 모델 성능 평가 함수 정의
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

# 테스트 데이터로 모델 평가
test(model, test_loader)


Using device: cuda
Epoch [1/100], Avg Train Loss: 0.8546, Time: 3.78 sec
Epoch [1/100], Avg Val Loss: 0.4542
Epoch [2/100], Avg Train Loss: 0.2233, Time: 3.52 sec
Epoch [2/100], Avg Val Loss: 0.2165
Epoch [3/100], Avg Train Loss: 0.1248, Time: 3.58 sec
Epoch [3/100], Avg Val Loss: 0.1602
Epoch [4/100], Avg Train Loss: 0.0858, Time: 3.57 sec
Epoch [4/100], Avg Val Loss: 0.1194
Epoch [5/100], Avg Train Loss: 0.0590, Time: 3.07 sec
Epoch [5/100], Avg Val Loss: 0.1228
Epoch [6/100], Avg Train Loss: 0.0459, Time: 3.36 sec
Epoch [6/100], Avg Val Loss: 0.1105
Epoch [7/100], Avg Train Loss: 0.0334, Time: 3.54 sec
Epoch [7/100], Avg Val Loss: 0.1065
Epoch [8/100], Avg Train Loss: 0.0204, Time: 3.57 sec
Epoch [8/100], Avg Val Loss: 0.1014
Epoch [9/100], Avg Train Loss: 0.0147, Time: 3.54 sec
Epoch [9/100], Avg Val Loss: 0.1107
Epoch [10/100], Avg Train Loss: 0.0108, Time: 3.55 sec
Epoch [10/100], Avg Val Loss: 0.1215
Epoch [11/100], Avg Train Loss: 0.0074, Time: 3.54 sec
Epoch [11/100], Avg Val 

In [12]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, SubsetRandomSampler
import numpy as np

# GPU 사용 가능 여부 확인
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# 데이터셋 다운로드 및 전처리
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# 레이블이 있는 데이터와 레이블이 없는 데이터 분리 (10% 레이블 사용)
labeled_indices = np.random.choice(len(train_dataset), size=int(0.1 * len(train_dataset)), replace=False)
unlabeled_indices = list(set(range(len(train_dataset))) - set(labeled_indices))

# 학습 데이터와 검증 데이터 분리
np.random.shuffle(labeled_indices)
split = int(0.8 * len(labeled_indices))
train_indices = labeled_indices[:split]
val_indices = labeled_indices[split:]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# DataLoader 설정 (GPU로 전송)
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=2, pin_memory=True)
val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# 간단한 CNN 모델 정의
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 모델 초기화 및 GPU 설정
model = SimpleCNN().to(device)

# 모델 학습 함수 정의
def train(model, train_loader, val_loader, optimizer, criterion, unlabeled_loader, epochs, patience=3):
    best_train_loss = float('inf')
    no_improvement = 0
    
    for epoch in range(epochs):
        start_time = time.time()
        
        model.train()
        total_train_loss = 0.0
        
        # 레이블이 있는 데이터로 학습
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)
        print(f'Epoch [{epoch + 1}/{epochs}], Avg Train Loss: {avg_train_loss:.4f}, Time: {time.time() - start_time:.2f} sec')

        # 레이블이 없는 데이터를 이용한 Self-training
        model.eval()
        with torch.no_grad():
            for inputs, _ in unlabeled_loader:
                inputs = inputs.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                
                # 가장 확신하는 예측을 추가하여 학습 데이터에 포함
                pseudo_labels = predicted
                pseudo_dataset = torch.utils.data.TensorDataset(inputs.cpu(), pseudo_labels.cpu())
                combined_dataset = torch.utils.data.ConcatDataset([train_dataset, pseudo_dataset])
                combined_loader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
                
                # 모델 재학습
                model.train()
                for inputs, labels in combined_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

        # Early stopping
        if avg_train_loss < best_train_loss:
            best_train_loss = avg_train_loss
            no_improvement = 0
        else:
            no_improvement += 1
            if no_improvement >= patience:
                print(f'Early stopping! No improvement for {patience} epochs.')
                break

# 모델 및 최적화 함수 설정
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# 모델 학습 및 early stopping 적용
train(model, train_loader, val_loader, optimizer, criterion, unlabeled_loader, epochs=100, patience=5)

# 모델 성능 평가 함수 정의
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

# 테스트 데이터로 모델 평가
test(model, test_loader)

Using device: cuda
Epoch [1/100], Avg Train Loss: 0.7527, Time: 3.00 sec


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn