In [3]:
import torch
import torchsummary
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data

import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

import matplotlib.pyplot as plt
import numpy as np

from torch.utils.data import DataLoader, Dataset
from PIL import Image
from glob import glob

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class CatsvsDogs(Dataset):
    def __init__(self, root: str, transform, phase: str = 'train', ):
        # 데이터셋의 전처리를 해주는 부분
        '''
         Cats-vs-Dogs dataset download from https://www.kaggle.com/datasets/shaunthesheep/microsoft-catsvsdogs-dataset

         root: str = "../Cats-vs-Dogs"
         phase: str = 'train' or 'test'
         transform: torchvision.transforms.Compose()
        '''

        def get_img_path(root: str):
            # "/data/DataSet/Cats-vs-Dogs/"
            if root[-1] == "/":
                root = root
            else:
                root = root + "/"

            dog_img_path = glob(root + "PetImages/Dog/*.jpg")
            cat_img_path = glob(root + "PetImages/Cat/*.jpg")

            assert phase == "test" or phase == "train", f"{phase} is not match. phase using train or test"

            if phase == "train":
                return dog_img_path[:int(len(dog_img_path) * 0.8)] + cat_img_path[:int(len(cat_img_path) * 0.8)]
            else:
                return dog_img_path[int(len(dog_img_path) * 0.8) :] + cat_img_path[:int(len(cat_img_path) * 0.8) :]

        self.transform = transform

        self.img_path = get_img_path(root)
        

    def __len__(self):
        # 데이터셋 길이, 총 샘플의 수를 리턴
        return len(self.img_path)

    def __getitem__(self, idx):
        # 데이터셋에서 특정 1개의 샘플을 가져오는 함수
        img_path = self.img_path[idx]
        
        img = Image.open(img_path).convert("RGB")

        img_transformed = self.transform(img)

        label = img_path.split('/')[-2]

        return img_transformed, label

In [None]:
train_data = CatsvsDogs(
    root = "/data/DataSet/Cats-vs-Dogs/",
    phase="train",
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
            mean=(0.5, 0.5, 0.5),
            std=(0.5, 0.5, 0.5)
        ),
        transforms.Resize(
            size=(224, 224)
        )
    ])
)

batch_size = 512
num_workers = 4

train_loader = DataLoader(
    dataset=train_data,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers
)

In [None]:
class VGG19(nn.Module):
    def __init__(self):
        super(VGG19, self).__init__()

        # 3x224x224 -> 64x112x112
        self.layer1 = nn.Sequential(
            # 3x224x224 -> 64x224x224
            nn.Conv2d(
                in_channels=3,
                out_channels=64,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=64
            ),
            nn.ReLU(inplace=True),
            # 64x224x224 -> 64x224x224
            nn.Conv2d(
                in_channels=64,
                out_channels=64,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=64
            ),
            nn.ReLU(inplace=True),
            # 64x224x224 -> 64x112x112
            nn.MaxPool2d(
                kernel_size=(2, 2),
                stride=(2, 2),
                padding=(1, 1)
            )
        )

        # 64x112x112 -> 128x56x56
        self.layer2 = nn.Sequential(
            # 64x112x112 -> 128x112x112
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=128
            ),
            nn.ReLU(inplace=True),
            # 128x112x112 -> 128x112x112
            nn.Conv2d(
                in_channels=128,
                out_channels=128,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=128
            ),
            nn.ReLU(inplace=True),
            # 128x112x112 -> 128x56x56
            nn.MaxPool2d(
                kernel_size=(2, 2),
                stride=(2, 2),
                padding=(1, 1)
            )
        )

        # 128x56x56 -> 256x28x28
        self.layer3 = nn.Sequential(
            # 128x56x56 -> 256x56x56
            nn.Conv2d(
                in_channels=128,
                out_channels=256,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=256
            ),
            nn.ReLU(inplace=True),
            # 256x56x56 -> 256x56x56
            nn.Conv2d(
                in_channels=256,
                out_channels=256,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=256
            ),
            nn.ReLU(inplace=True),
            # 256x56x56 -> 256x56x56
            nn.Conv2d(
                in_channels=256,
                out_channels=256,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=256
            ),
            nn.ReLU(inplace=True),
            # 256x56x56 -> 256x56x56
            nn.Conv2d(
                in_channels=256,
                out_channels=256,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=256
            ),
            nn.ReLU(inplace=True),
            # 256x56x56 -> 256x28x28
            nn.MaxPool2d(
                kernel_size=(2, 2),
                stride=(1, 1),
                padding=(1, 1)
            )
        )

        # 256x28x28 -> 512x14x14
        self.layer4 = nn.Sequential(
            # 256x28x28 -> 512x28x28
            nn.Conv2d(
                in_channels=256,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x28x28 -> 512x28x28
            nn.Conv2d(
                in_channels=512,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x28x28 -> 512x28x28
            nn.Conv2d(
                in_channels=512,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x28x28 -> 512x28x28
            nn.Conv2d(
                in_channels=512,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x28x28 -> 512x14x14
            nn.MaxPool2d(
                kernel_size=(2, 2),
                stride=(1, 1),
                padding=(1, 1)
            )
        )

        # 512x14x14 -> 512x7x7
        self.layer5 = nn.Sequential(
            # 512x14x14 -> 512x14x14
            nn.Conv2d(
                in_channels=512,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x14x14 -> 512x14x14
            nn.Conv2d(
                in_channels=512,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x14x14 -> 512x14x14
            nn.Conv2d(
                in_channels=512,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x14x14 -> 512x14x14
            nn.Conv2d(
                in_channels=512,
                out_channels=512,
                kernel_size=(3, 3),
                stride=(1, 1),
                padding=(1, 1)
            ),
            nn.BatchNorm2d(
                num_features=512
            ),
            nn.ReLU(inplace=True),
            # 512x14x14 -> 512x7x7
            nn.MaxPool2d(
                kernel_size=(2, 2),
                stride=(1, 1),
                padding=(1, 1)
            )
        )
        
        # 512*7*7 -> 4096
        self.layer6 = nn.Sequential(
            nn.Linear(
                in_features=512*7*7,
                out_features=4096
            ),
            nn.ReLU(inplace=True),
            nn.Dropout(
                p=0.5
            )
        )

        # 4096 -> 4096
        self.layer7 = nn.Sequential(
            nn.Linear(
                in_features=4096,
                out_features=4096
            ),
            nn.ReLU(inplace=True),
            nn.Dropout(
                p=0.5
            )
        ),

        # 4096 -> 1000
        self.layer8 = nn.Sequential(
            nn.Linear(
                in_features=4096,
                out_features=1000
            ),
            nn.Softmax(
                dim=1
            )
        )


    def forward(self, x):
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)

        x = torch.view(-1, 512*7*7)
        
        x = self.layer6(x)
        x = self.layer7(x)
        x = self.layer8(x)
        return x