<a href="https://colab.research.google.com/github/kmsrogerkim/HAI-Intro-to-Torch/blob/master/Intro_to_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms

from tqdm import tqdm
import matplotlib.pyplot as plt

# Dataset

In [None]:
# 이렇게 딸깍 할 수 있으면 좋다
# The MNIST database is that of handwritten digits, commonly used for training and testing image classification.
train_dataset = []
test_dataset = []
# train_dataset = torchvision.datasets.MNIST(root='.', train=True, transform=torchvision.transforms.ToTensor(), download=True)
# test_dataset = torchvision.datasets.MNIST(root='.', train=False, transform=torchvision.transforms.ToTensor(), download=True)

# Train Dataset에 대한 DataLoader
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=512, shuffle=True)

# 이 코드는 앞으로 많이 볼테니 익숙해지자
# 사이즈를 바꾸고
# 텐서로 데이터 타입을 바꾸고
# 노멀라이즈 한다
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# 좀 더 어렵고 실용적인 예시
from torch.utils.data import Dataset
class BinaryImageDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        # 아까 위에서 본 코드
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def getitem(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]

        # 대충 이미지를 특정 포멧으로 램에 적제하는 코드
        image = self.transform(image)
        return image, torch.tensor(label, dtype=torch.long)

# 대충 이미지 주소들 모으는 코드
image_paths: list = [...]
labels: list = [...]

# 모아진 이미지 주소들을 각각 train & test (validation) 으로 나눈다
from sklearn.model_selection import train_test_split
train_paths, val_paths, train_labels, val_labels = train_test_split(...)

# Create datasets
train_dataset = BinaryImageDataset(...)
val_dataset = BinaryImageDataset(...)

# Create dataloaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False)

# Loss Function & Optimizer

In [None]:
# 이것식으로 역시 딸각 할 수 있다
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 하지만, 커스텀 솔실 함수를 정의해야하는 일이 일어난다
# 어떤 특정 논문을 코드로 구현하고자 할때
# 근데 그런일이 여러분한테는 아직은 없을 것 같으니 스킵하자

# Training

In [None]:
# 대충 이런 방식으로 진행된다 정도만 알고 있으면 된다
loss_values, accuracy = [], []

for epoch in tqdm(range(3)):
    print(f"Epoch: {epoch}")
    model.train()

    step = 0

    for image, label in tqdm(train_dataloader):
        image = image.reshape(-1, 28*28).cuda()
        label = label.reshape(-1).cuda()

        optimizer.zero_grad()

        prediction = model(image)
        loss = loss_fn(prediction, label)

        loss.backward() # This is backpropagation
        optimizer.step()

        step += 1

        # Maybe some more steps
        # But this is the core

# Model Definition

In [3]:
class Model(nn.Module):
  def __init__(self, ):
    super().__init__()

    # 2: input dimension, 10: output dimension
    self.layer1 = nn.Linear(2, 10)
    self.layer2 = nn.Linear(10, 10)
    self.layer3 = nn.Linear(10, 2)

    # activation function
    self.relu = nn.ReLU() #비선형성 추가

  #automatically ran when called
  def forward(self, x):
    # ipnut shape x: 2
    x = self.layer1(x)
    x = self.relu(x)

    # input shape x: 10
    x = self.layer2(x)
    x = self.relu(x)

    # input shape x: 10
    x = self.layer3(x)
    x = nn.functional.softmax(x)

    # return shape x: 2
    return x

## 좀 더 어려운 참고용 예시

In [5]:
# 좀 더 어려운 예시 (참고용)
class Discriminator(nn.Module):
  def __init__(self):
    super().__init__()

    self.model = nn.Sequential(
      nn.Linear(28*28, 1024),
      nn.LeakyReLU(0.2),
      nn.Dropout(0.3),
      nn.Linear(1024, 512),
      nn.LeakyReLU(0.2),
      nn.Dropout(0.3),
      nn.Linear(512, 256),
      nn.LeakyReLU(0.2),
      nn.Dropout(0.3),
      nn.Linear(256, 1),
      nn.Sigmoid()
    )

  def forward(self, x):
    return self.model(x)

# 명구 형이 쓴 U-Net
class DownBlock(nn.Module):
    def __init__(self, in_ch, out_ch):
        kernel_size = 3
        stride = 1
        padding = 1

        super().__init__()
        layers = [
            nn.Conv2d(in_ch, out_ch, kernel_size, stride, padding),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(),
            nn.Conv2d(out_ch, out_ch, kernel_size, stride, padding),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(),
            nn.MaxPool2d(2),
        ]
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

class UpBlock(nn.Module):
    def __init__(self, in_ch, out_ch):
        kernel_size = 3
        stride = 1
        padding = 1

        strideT = 2
        out_paddingT = 1

        super().__init__()
        layers = [
            nn.ConvTranspose2d(2 * in_ch, out_ch, kernel_size, strideT, padding, out_paddingT),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(),
            nn.Conv2d(out_ch, out_ch, kernel_size, stride, padding),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(),
        ]
        self.model = nn.Sequential(*layers)

    def forward(self, x, skip):
        x = torch.cat((x, skip), 1)
        x = self.model(x)
        return x

class EmbedBlock(nn.Module):
    def __init__(self, input_dim, emb_dim):
        super().__init__()
        self.input_dim = input_dim
        layers = [
            nn.Linear(input_dim, emb_dim),
            nn.ReLU(),
            nn.Linear(emb_dim, emb_dim),
            nn.Unflatten(1, (emb_dim, 1, 1)),
        ]
        self.model = nn.Sequential(*layers)

    def forward(self, input):
        input = input.view(-1, self.input_dim)
        return self.model(input)


class Unet(nn.Module):
    def __init__(self, ch, size, _down_chs, timestep=1000):
        super().__init__()
        down_chs = _down_chs    # len = 5
        up_chs = down_chs[::-1]
        latent_image_size = size // 16 # 2 ** (len(down_chs) - 1)
        t_dim = 1

        self.image_size = (1, ch, size, size)

        self.down0 = nn.Sequential(
            nn.Conv2d(ch, down_chs[0], 3, padding=1),
            nn.BatchNorm2d(down_chs[0]),
            nn.ReLU(),
        )

        self.down1 = DownBlock(down_chs[0], down_chs[1])
        self.down2 = DownBlock(down_chs[1], down_chs[2])
        self.down3 = DownBlock(down_chs[2], down_chs[3])
        self.down4 = DownBlock(down_chs[3], down_chs[4])
        self.to_vec = nn.Sequential(nn.Flatten(), nn.ReLU())

        self.dense_emb = nn.Sequential(
            nn.Linear(down_chs[4]*latent_image_size**2, down_chs[3]),
            nn.ReLU(),
            nn.Linear(down_chs[3], down_chs[2]),
            nn.ReLU(),
            nn.Linear(down_chs[2], down_chs[1]),
            nn.ReLU(),
            nn.Linear(down_chs[1], down_chs[2]),
            nn.ReLU(),
            nn.Linear(down_chs[2], down_chs[3]),
            nn.ReLU(),
            nn.Linear(down_chs[3], down_chs[4]*latent_image_size**2),
            nn.ReLU(),
        )
        self.temb_1 = EmbedBlock(t_dim, up_chs[0])
        self.temb_2 = EmbedBlock(t_dim, up_chs[1])
        self.temb_3 = EmbedBlock(t_dim, up_chs[2])
        self.temb_4 = EmbedBlock(t_dim, up_chs[3])

        self.up0 = nn.Sequential(
            nn.Unflatten(1, (up_chs[0], latent_image_size, latent_image_size)),
            nn.Conv2d(up_chs[0], up_chs[0], 3, padding=1),
            nn.BatchNorm2d(up_chs[0]),
            nn.ReLU(),
        )
        self.up1 = UpBlock(up_chs[0], up_chs[1])
        self.up2 = UpBlock(up_chs[1], up_chs[2])
        self.up3 = UpBlock(up_chs[2], up_chs[3])
        self.up4 = UpBlock(up_chs[3], up_chs[4])

        self.out = nn.Sequential(
            nn.Conv2d(up_chs[-1], up_chs[-1], 3, 1, 1),
            nn.BatchNorm2d(up_chs[-1]),
            nn.ReLU(),
            nn.Conv2d(up_chs[-1], ch, 3, 1, 1),
        )

        self.timestep = timestep
        self.betas = torch.linspace(1e-4, 2e-2, self.timestep)
        self.alphas = 1 - self.betas
        self.alphas_bar = torch.cumprod(self.alphas, -1)

    def forward(self, x, t):
        timestep = torch.tensor([self.timestep], device=x.device)

        down0 = self.down0(x)
        down1 = self.down1(down0)
        down2 = self.down2(down1)
        down3 = self.down3(down2)
        down4 = self.down4(down3)
        latent_vec = self.to_vec(down4)

        t = t.float() / timestep
        latent_vec = self.dense_emb(latent_vec)
        temb_1 = self.temb_1(t)
        temb_2 = self.temb_2(t)
        temb_3 = self.temb_3(t)
        temb_4 = self.temb_4(t)

        up0 = self.up0(latent_vec)
        up1 = self.up1(up0+temb_1, down4)
        up2 = self.up2(up1+temb_2, down3)
        up3 = self.up3(up2+temb_3, down2)
        up4 = self.up4(up3+temb_4, down1)
        return self.out(up4)

    def get_loss(self, input, t):
        alphas_bar = self.alphas_bar.to(input.device)
        alphas_bar_t = alphas_bar[t].view(-1, 1, 1, 1)

        noise = torch.randn_like(input)
        input = alphas_bar_t.sqrt() * input + (1 - alphas_bar_t).sqrt() * noise

        pred = self(input, t)

        #loss = F.mse_loss(pred, noise)
        loss = (noise - pred).square().mean()

        return loss

    def sampling(self):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        betas = self.betas.to(device)
        alphas = self.alphas.to(device)
        alphas_bar = self.alphas_bar.to(device)

        x = torch.randn(self.image_size, device=device)

        iteration = tqdm(range(0, self.timestep)[::-1])
        iteration.set_description('Sampling...')

        for t in iteration:
            sigma = betas[t].sqrt()

            if t > 0:
                z = torch.randn(self.image_size, device=device)
            else:
                z = 0

            pred = self(x, torch.tensor([t], device=x.device))
            x = (1 / alphas[t].sqrt()) * (x - (1 - alphas[t]) / (1 - alphas_bar[t]).sqrt() * pred) + sigma * z

        return x

# 사실 "Torch"를 배운다는 것은 모순
- NN 아키텍쳐를 모르는 상태에서 토치 코드를 이해 할려고 하는 것은 미련한 시간 낭비
- 애당초에 불가능 함

- 예를 들자면, 위의 Discriminator 모델을 이해하기 위해서는 [Goodfellow J. I., et al., 의 Generative Adversarial Nets (GAN) 논문](https://arxiv.org/abs/1406.2661)을 이해햐아함

- 그리고 그 다음 블록 (U-Net)을 이해할려면 [Ronneberger O., et al.,의 U-Net 논문을 읽어야함](https://arxiv.org/abs/1505.04597)

- 위의 논문들은 나도 안 읽어 봄

- 그렇게 논문을 읽어서 이해할 수 있다고 해도
- 각각 논문에 대해서 Loss function을 써야하고
- 데이터 생성, 샘플링 등에 관해 해야하고
- 트레이닝을 시켜야한다

### 그런거는 지피티한테 해달라고 정중히 부탁하는 편이다