# _**GoogLeNet_voc2012**


1. inception 모듈
2. auxiliary classifier 
3. global average pooling

![alt text](image-1.png)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import os


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
print(f"CUDA available: {torch.cuda.is_available()}")

Using device: cuda
CUDA available: True


In [2]:
import torch
import torch.nn as nn
from torch import Tensor
from typing import Optional

In [3]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs) -> None:
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv(x)
        x = self.batchnorm(x)
        x = self.relu(x)
        return x

In [4]:

class Inception(nn.Module):
    def __init__(self, in_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj) -> None:
        super(Inception, self).__init__()
        self.branch1 = ConvBlock(in_channels, n1x1, kernel_size=1, stride=1, padding=0)

        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, n3x3_reduce, kernel_size=1, stride=1, padding=0),
            ConvBlock(n3x3_reduce, n3x3, kernel_size=3, stride=1, padding=1))
        
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, n5x5_reduce, kernel_size=1, stride=1, padding=0),
            ConvBlock(n5x5_reduce, n5x5, kernel_size=5, stride=1, padding=2))

        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            ConvBlock(in_channels, pool_proj, kernel_size=1, stride=1, padding=0))
        
    def forward(self, x: Tensor) -> Tensor:
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x4 = self.branch4(x)
        return torch.cat([x1, x2, x3, x4], dim=1)

In [5]:
class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes) -> None:
        super(InceptionAux, self).__init__()
        self.avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = ConvBlock(in_channels, 128, kernel_size=1, stride=1, padding=0)
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.dropout = nn.Dropout(p=0.7)
        self.relu = nn.ReLU()

    def forward(self, x: Tensor) -> Tensor:
        x = self.avgpool(x)
        x = self.conv(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [6]:
class GoogLeNet(nn.Module):
    def __init__(self, aux_logits=True, num_classes=1000) -> None:
        super(GoogLeNet, self).__init__()
        assert aux_logits == True or aux_logits == False
        self.aux_logits = aux_logits

        self.conv1 = ConvBlock(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)
        self.conv2 = ConvBlock(in_channels=64, out_channels=64, kernel_size=1, stride=1, padding=0)
        self.conv3 = ConvBlock(in_channels=64, out_channels=192, kernel_size=3, stride=1, padding=1)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
        self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
        self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
        self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
        self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
        self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
        self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
        self.maxpool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
        self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
        #self.avgpool = nn.AvgPool2d(kernel_size=8, stride=1) # if output size error occurred, choice add padding=3 or use AdaptiveAvgPool2d like bewlo:
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(p=0.4)
        self.linear = nn.Linear(1024, num_classes)

        if self.aux_logits:
            self.aux1 = InceptionAux(512, num_classes)
            self.aux2 = InceptionAux(528, num_classes)
        else:
            self.aux1 = None
            self.aux2 = None

    def transform_input(self, x: Tensor) -> Tensor:
        x_R = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
        x_G = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
        x_B = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
        x = torch.cat([x_R, x_G, x_B], 1)
        return x
        
    def forward(self, x: Tensor) -> Tensor:
        x = self.transform_input(x)

        x = self.conv1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.maxpool2(x)
        x = self.a3(x)
        x = self.b3(x)
        x = self.maxpool3(x)
        x = self.a4(x)
        aux1: Optional[Tensor] = None
        if self.aux_logits and self.training:
            aux1 = self.aux1(x)

        x = self.b4(x)
        x = self.c4(x)
        x = self.d4(x)
        aux2: Optional[Tensor] = None
        if self.aux_logits and self.training:
            aux2 = self.aux2(x)

        x = self.e4(x)
        x = self.maxpool4(x)
        x = self.a5(x)
        x = self.b5(x)
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1) # x = x.reshape(x.shape[0], -1)
        x = self.linear(x)
        x = self.dropout(x)

        if self.aux_logits and self.training:
            return aux1, aux2
        else:
            return x


In [7]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 이미지 변환 설정_224x224로 resize, 정규화
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# CIFAR-10 데이터셋 다운로드
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# DataLoader 설정 / 배치사이즈 32
train_loader = DataLoader(trainset, batch_size=32, shuffle=True)
test_loader = DataLoader(testset, batch_size=32, shuffle=False)

# 데이터셋의 첫 번째 이미지와 라벨 확인
data_iter = iter(train_loader)  # DataLoader 객체를 반복 가능한 객체로 변환
images, labels = next(data_iter)  # 첫 번째 배치를 가져오기

# 라벨 출력
print("라벨:", labels)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data\cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:28<00:00, 5892399.93it/s]


Extracting ./data\cifar-10-python.tar.gz to ./data
Files already downloaded and verified
라벨: tensor([8, 5, 3, 7, 7, 6, 9, 9, 0, 9, 3, 4, 0, 4, 3, 1, 4, 8, 2, 4, 1, 1, 3, 6,
        1, 5, 9, 7, 9, 5, 7, 6])


In [9]:
# 모델 생성
def train(model, train_loader, test_loader, num_epochs=10, learning_rate=0.001):
    from tqdm import tqdm
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # CUDA 사용 가능하면 GPU 사용, 아니면 CPU 사용
    model = model.to(device)  # 모델을 지정된 장치(GPU/CPU)로 이동
    
    criterion = nn.CrossEntropyLoss()  # 다중 클래스 분류 손실 함수
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # 모델 학습 및 검증
    for epoch in tqdm(range(num_epochs)):
        model.train()  # 모델을 학습 모드로 전환
        running_loss = 0.0
        
        # 미니 배치 단위로 데이터를 불러옴
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            # 입력 데이터를 모델에 통과시킴
            if model.aux_logits:  # aux_logits가 True일 때
                outputs = model(inputs)  # 보조 출력을 포함한 모델 통과
                if isinstance(outputs, tuple):  # 모델이 튜플을 반환하면 보조 출력도 포함
                    outputs, aux1 = outputs  # 보조 출력이 하나만 반환되는 경우
                    aux1 = aux1.view(-1, 10)
                    loss1 = criterion(aux1, targets)
                    loss = criterion(outputs, targets) + 0.3 * loss1
                else:  # 보조 출력을 하나만 반환할 경우
                    outputs = outputs[0]  # 주 출력만 사용
                    targets = targets.view(-1)  # (batch_size * height * width,)
                    outputs = outputs.view(-1, 10)
                    loss = criterion(outputs, targets)
            else:  # aux_logits가 False일 때
                outputs = model(inputs)
                targets = targets.view(-1)  # (batch_size * height * width,)
                outputs = outputs.view(-1, 10)  # (batch_size * height * width, num_classes)
                loss = criterion(outputs, targets)

            # 역전파 및 최적화
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
        
        # 검증 데이터로 성능 평가
        evaluate(model, test_loader, device)

# 검증 함수
def evaluate(model, test_loader, device):
    model.eval()  # 모델을 평가 모드로 전환
    correct = 0
    total = 0
    # 그라디언트 계산 비활성화
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)  # 입력 데이터를 지정된 장치로 이동
            outputs = model(inputs)
            outputs = outputs.view(-1, 10)  # (batch_size * height * width, num_classes)
            targets = targets.view(-1)  # (batch_size * height * width,)
            
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")






In [10]:
# 모델 생성 및 학습
model = GoogLeNet(aux_logits=True, num_classes=10)  # CIFAR10에는 10개의 클래스를 사용합니다
train(model, train_loader, test_loader, num_epochs=10, learning_rate=0.001)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [1/10], Loss: 1.8761


 10%|█         | 1/10 [01:21<12:12, 81.38s/it]

Validation Accuracy: 8.76%
Epoch [2/10], Loss: 1.3253


 20%|██        | 2/10 [02:42<10:47, 80.98s/it]

Validation Accuracy: 9.60%
Epoch [3/10], Loss: 1.0821


 30%|███       | 3/10 [04:03<09:29, 81.38s/it]

Validation Accuracy: 7.68%
Epoch [4/10], Loss: 0.9211


 40%|████      | 4/10 [05:24<08:05, 80.98s/it]

Validation Accuracy: 10.24%
Epoch [5/10], Loss: 0.8024


 50%|█████     | 5/10 [06:44<06:43, 80.75s/it]

Validation Accuracy: 10.70%
Epoch [6/10], Loss: 0.7072


 60%|██████    | 6/10 [08:03<05:20, 80.00s/it]

Validation Accuracy: 10.06%
Epoch [7/10], Loss: 0.6257


 70%|███████   | 7/10 [09:23<04:00, 80.00s/it]

Validation Accuracy: 8.81%
Epoch [8/10], Loss: 0.5531


 80%|████████  | 8/10 [10:46<02:42, 81.01s/it]

Validation Accuracy: 10.09%
Epoch [9/10], Loss: 0.4983


 90%|█████████ | 9/10 [12:08<01:21, 81.38s/it]

Validation Accuracy: 10.26%
Epoch [10/10], Loss: 0.4429


100%|██████████| 10/10 [13:31<00:00, 81.13s/it]

Validation Accuracy: 10.28%



