In [1]:
import torch
import torchsummary
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data

import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

import matplotlib.pyplot as plt
import numpy as np

from torch.utils.data import DataLoader, Dataset
from PIL import Image
from glob import glob

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
class CatsvsDogs(Dataset):
    def __init__(self, root: str, transform, phase: str = 'train', ):
        # 데이터셋의 전처리를 해주는 부분
        '''
         Cats-vs-Dogs dataset download from https://www.kaggle.com/datasets/shaunthesheep/microsoft-catsvsdogs-dataset

         root: str = "../Cats-vs-Dogs"
         phase: str = 'train' or 'test'
         transform: torchvision.transforms.Compose()
        '''

        def get_img_path(root: str):
            # "/data/DataSet/Cats-vs-Dogs/"
            if root[-1] == "/":
                root = root
            else:
                root = root + "/"

            dog_img_path = glob(root + "PetImages/Dog/*.jpg")
            cat_img_path = glob(root + "PetImages/Cat/*.jpg")

            assert phase == "test" or phase == "train", f"{phase} is not match. phase using train or test"

            if phase == "train":
                return dog_img_path[:int(len(dog_img_path) * 0.8)] + cat_img_path[:int(len(cat_img_path) * 0.8)]
            else:
                return dog_img_path[int(len(dog_img_path) * 0.8) :] + cat_img_path[:int(len(cat_img_path) * 0.8) :]

        self.transform = transform

        self.img_path = get_img_path(root)
        

    def __len__(self):
        # 데이터셋 길이, 총 샘플의 수를 리턴
        return len(self.img_path)

    def __getitem__(self, idx):
        # 데이터셋에서 특정 1개의 샘플을 가져오는 함수
        img_path = self.img_path[idx]
        
        img = Image.open(img_path).convert("RGB")

        img_transformed = self.transform(img)

        label = img_path.split('/')[-2]

        if label == "Dog":
            label = 1
        elif label == "Cat":
            label = 0
            
        return img_transformed, label

In [3]:
train_data = CatsvsDogs(
    root = "/data/DataSet/Cats-vs-Dogs/",
    phase="train",
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
            mean=(0.5, 0.5, 0.5),
            std=(0.5, 0.5, 0.5)
        ),
        transforms.Resize(
            size=(224, 224)
        )
    ])
)

batch_size = 8
num_workers = 0

train_loader = DataLoader(
    dataset=train_data,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers
)

In [4]:
class vgg_layers(nn.Module):
    def __init__(self, config: list, normalize: bool = True, in_channels: int = 3):
        super(vgg_layers, self).__init__()

        layers = []

        for out_channels in config:
            assert out_channels == "M" or isinstance(out_channels, int)

            if out_channels == "M":
                layers.append(
                    nn.MaxPool2d(
                        kernel_size=(2, 2),
                        stride=(1, 1),
                        padding=(1, 1)
                    )
                )
            else:
                layers.append(
                    nn.Conv2d(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=3,
                        stride=1,
                        padding=1
                    )
                )
                if normalize:
                    layers.append(
                        nn.BatchNorm2d(
                            num_features=out_channels
                        )
                    )
                layers.append(
                    nn.ReLU(
                        inplace=True
                    )
                )
                in_channels = out_channels

        self.model = nn.Sequential(*layers)

    def forward(self, x):

        return self.model(x)

In [5]:
class VGG(nn.Module):
    def __init__(self, features, out_features: int = 1000):
        super(VGG, self).__init__()

        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d(
            output_size=(7, 7)
        )
        self.classifier = nn.Sequential(
            nn.Linear(
                in_features=512*7*7,
                out_features=4096
            ),
            nn.ReLU(inplace=True),
            nn.Dropout(
                p=0.5
            ),
            
            nn.Linear(
                in_features=4096,
                out_features=4096
            ),
            nn.ReLU(inplace=True),
            nn.Dropout(
                p=0.5
            ),
            nn.Linear(
                in_features=4096,
                out_features=out_features
            )
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)

        h = x.view(-1, 512*7*7)

        x = self.classifier(h)
        return x, h

In [6]:
vgg11_config = [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"] # 8 + 3 = 11

vgg13_config = [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512, "M"] # 10 + 3 = 13

vgg16_config = [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512, "M"] # 13 + 3 = 16

vgg19_config = [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512, "M"] # 16 + 3 = 19

In [7]:
vgg19_layers = vgg_layers(vgg11_config, normalize=True)

model = VGG(vgg19_layers, 2)

In [8]:
pretrained_model = models.vgg19_bn(pretrained=True)
print(pretrained_model)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [9]:
optimizer = optim.Adam(model.parameters(), lr=1e-7)
criterion = nn.CrossEntropyLoss()

model = model.to(device)
criterion = criterion.to(device)

In [10]:
# def train(model, iterator, optimizer, criterion, device):
model.train()

for x, y in train_loader:
    
    print(type(y))
    x = x.to(device)
    y = y.to(device)

    optimizer.zero_grad()

    y_hat = model(x)
    print(y_hat[0].shape)

    break

<class 'torch.Tensor'>
torch.Size([8, 2])
