In [1]:
import os

import torch 
import torchvision
import torch.nn.functional as F
from torch import nn, optim
from torchvision import transforms, datasets

import matplotlib.pyplot as plt

import zipfile
from PIL import Image
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

from torch.utils.data import DataLoader



In [2]:
class args:
    epoch = 10
    batch_size = 64

* device를 설정해 놓으면 to.device를 용이하게 쓸 수 있음, gpu가 사용 가능한 상태라도 cpu로 설정해 놓으면 cpu만 사용 가능하기 때문

In [3]:
device = "cuda" if torch.cuda.is_available else "cpu"
# device = "cpu"

* Kaggle에서 Dogs vs . Cats Competition 데이터를 받아온 후 압축 해제

In [4]:
train_data_path = "/kaggle/input/dogs-vs-cats/train.zip"
test_data_path =  "/kaggle/input/dogs-vs-cats/test1.zip"
for path in [train_data_path, test_data_path]:
    with zipfile.ZipFile(path, "r") as zip:
        zip.extractall()

* 가용 메모리상 전체 데이터를 사용할 수 없어 파일 이름의 순서대로 정렬한 후 512개만 무작위로 추출

In [5]:
train_files = os.listdir("/kaggle/working/train")
cat_files = sorted([file for file in train_files if file.split(".")[0] == "cat"], key=lambda x: int(x.split(".")[1]))
dog_files = sorted([file for file in train_files if file.split(".")[0] == "dog"], key=lambda x: int(x.split(".")[1]))

In [6]:
cat_random_sample = np.random.choice(cat_files, 512)
dog_random_sample = np.random.choice(dog_files, 512)

* Dogs vs. Cats 파일의 이미지 크기가 모두 달라, 256 * 256 크기로 Resize

In [7]:
def img_resize(files,path):
    img_to_numpy = []
    for file in tqdm(files):
        file_img = Image.open(f"{path}/{file}")
        file_img = file_img.resize((256,256))
        img_to_numpy.append(np.array(file_img))
    return np.array(img_to_numpy)


In [8]:
numpy_cat = img_resize(cat_random_sample, "/kaggle/working/train")
# cat_label = np.array([0]*len(numpy_cat))
numpy_dog = img_resize(dog_random_sample, "/kaggle/working/train")
# dog_label = np.array([1]*len(numpy_cat))

100%|██████████| 512/512 [00:01<00:00, 272.92it/s]
100%|██████████| 512/512 [00:02<00:00, 223.79it/s]


* 일반적인 image shape은 [height, width, channel] 이지만 torch에서 사용하는 image shape은 [hannel, height, width]
* torchvision 패키지의 transforms.ToTensor를 사용하여 torch에 맞는 image shape으로 바꿔주는 것

In [9]:
torch_cat = torch.stack([transforms.ToTensor()(cat_img) for cat_img in numpy_cat])
# torch_cat_label = torch.IntTensor(cat_label)
torch_dog = torch.stack([transforms.ToTensor()(dog_img) for dog_img in numpy_dog])
# torch_dog_label = torch.IntTensor(dog_label)

* 간단한 구조의 Convolution Layer로 Encoder, Decoder 구성

In [None]:
class AutoEncoder_conv(nn.Module):
    def __init__(self):
        super().__init__()
        #ENC
        self.enc_conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.enc_conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool2d = nn.MaxPool2d(2,2)
        
        #DEC
        self.dec_conv1 = nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2, padding=0)
        self.dec_conv2 = nn.ConvTranspose2d(16, 3, kernel_size=2, stride=2, padding=0)
        
        #
        self.sigmoid = nn.Sigmoid()
    def encoder(self, x):
        x = self.enc_conv1(x)
        x = self.relu(x)
        x = self.maxpool2d(x)
        x = self.enc_conv2(x)
        x = self.relu(x)
        x = self.maxpool2d(x)
        return x
    
    def decoder(self, x):
        x = self.dec_conv1(x)
        x = self.relu(x)
        x = self.dec_conv2(x)
        return x
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        x = self.sigmoid(x)
        return x