# 1. 데이터 가져오기

In [1]:
import torchvision.transforms as T
import torchvision
import torch
from torch.utils.data import DataLoader

In [2]:
cifar10_transform = T.Compose([
    T.ToTensor()
])

download_root = './CIFAR10_DATASET'

train_dataset = torchvision.datasets.CIFAR10(download_root, transform=cifar10_transform, train=True, download=False) # train dataset 다운로드
test_dataset = torchvision.datasets.CIFAR10(download_root, transform=cifar10_transform, train=False, download=False) # test dataset 다운로드

In [3]:
total_size = len(train_dataset)
train_num, valid_num = int(total_size * 0.8), int(total_size * 0.2)

train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [train_num, valid_num])

print(f"Train dataset 개수 : {train_num}")
print(f"Valid dataset 개수 : {valid_num}")
print(f"Test dataset 개수 : {len(test_dataset)}")

Train dataset 개수 : 40000
Valid dataset 개수 : 10000
Test dataset 개수 : 10000


In [4]:
BATCH_SIZE = 32

train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
valid_dataloader = DataLoader(valid_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)

In [5]:
for data, label in train_dataloader:
    print(data.shape)
    break

torch.Size([32, 3, 32, 32])


# 2. 모델 생성

In [7]:
import torch
import torch.nn as nn
from pytorch_lightning import LightningModule, Trainer, LightningDataModule
import torch.optim as optim

import torchmetrics

In [8]:
class Classifier(LightningModule):
    def __init__(self, num_classes, dropout_ratio, lr = 0.001):
        super().__init__()
        self.learning_rate = lr
        self.accuracy = torchmetrics.Accuracy(task = 'multiclass', num_classes = num_classes)
        self.criterion = nn.CrossEntropyLoss()

        self.num_classes = num_classes
        self.dropout_ratio = dropout_ratio
        '''
        Input : [32, 3, 32, 32] => [batch_size, channel, height, weidth]
        nn.Conv2d(1) : [32 , 16, 28, 28]
        nn.Conv2d(2) : [32 , 32, 24, 24]
        nn.MaxPool2d(1) : [32, 32, 12, 12]
        nn.Conv2d(3) : [32, 64, 8, 8]
        nn.MaxPool2d(2) : [32, 64, 4, 4]
        '''

        self.layer = nn.Sequential(
            nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 5), 
            nn.ReLU(),
            nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
            nn.Dropout(self.dropout_ratio),
            nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2),
            nn.Dropout(self.dropout_ratio),
        )

        self.flatten = nn.Flatten()
        self.fc_layer = nn.Linear(1024, self.num_classes)

    def forward(self, x):
        out = self.layer(x)
        out = self.flatten(out)
        out = self.fc_layer(out)

        return out

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr = self.learning_rate)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size = 1, gamma = 0.9)

        return [optimizer], [scheduler]
    
    def training_step(self, batch, batch_idx):
        images, labels = batch

        outputs = self(images)

        loss = self.criterion(outputs, labels)
        acc = self.accuracy(outputs, labels)

        self.log("train_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log("train_acc", acc, on_step = False, on_epoch = True, logger = True)

        return loss
    
    def validation_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = self.criterion(outputs, labels)

        _, preds = torch.max(outputs, dim = 1)
        acc = self.accuracy(preds, labels)

        self.log(f"valid_loss", loss, on_step = False, on_epoch = True, logger = True)
        self.log(f"valid_acc", acc, on_step = False, on_epoch = True, logger = True)

    def test_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = self.criterion(outputs, labels)
        _, preds = torch.max(outputs, dim = 1)
        acc = self.accuracy(preds, labels)

        self.log(f"test_loss", loss, on_step = False, on_epoch = True)
        self.log(f"test_acc", acc, on_step = False, on_epoch = True)

    def predict_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        _, preds = torch.max(outputs, dim = 1)

        return preds

In [11]:
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import CSVLogger, WandbLogger, TensorBoardLogger

model = Classifier(num_classes = 10, dropout_ratio = 0.2)

early_stopping = EarlyStopping(monitor = 'valid_loss', mode = 'min')
lr_monitor = LearningRateMonitor(logging_interval = "epoch")

csv_logger = CSVLogger(save_dir = "./csv_logger", name = 'test')
# wandb_logger = WandbLogger(project="CIFAR_CNN")
# tensorboard_logger = TensorBoardLogger("tb_logs", name="CIFAR_CNN")


trainer = Trainer(
    max_epochs = 100,
    accelerator = 'auto',
    callbacks = [early_stopping, lr_monitor],
    logger = csv_logger
)

trainer.fit(model, train_dataloader, valid_dataloader)
trainer.test(model, test_dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | Mode 
---------------------------------------------------------
0 | accuracy  | MulticlassAccuracy | 0      | train
1 | criterion | CrossEntropyLoss   | 0      | train
2 | layer     | Sequential         | 65.3 K | train
3 | flatten   | Flatten            | 0      | train
4 | fc_layer  | Linear             | 10.2 K | train
5 | softmax   | Softmax            | 0      | train
---------------------------------------------------------
75.6 K    Trainable params
0         Non-trainable params
75.6 K    Total params
0.302     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\tjsgh\anaconda3\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
c:\Users\tjsgh\anaconda3\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\tjsgh\anaconda3\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 1.8398627042770386, 'test_acc': 0.6216999888420105}]