# Generative Adversarial Network (GAN) - Lab
#### Author: Kamil Barszczak

In [14]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
import glob
import cv2
import os

from torch.utils import data
from tqdm import tqdm

#### Notebook parameters

In [19]:
dataset_path = 'E:/Data/CUB_200_2011'
train_test_split = 0.9
width, height = 64, 64
latent_dim = 64
batch = 64
ngpu = 1

#### Set up device

In [20]:
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

#### Load and process data

In [16]:
bboxes = pd.read_csv(os.path.join(dataset_path, 'bounding_boxes.txt'), sep = " ", names=["id", "x", "y", "width", "height"]).astype(int)
annotations = pd.read_csv(os.path.join(dataset_path, 'images.txt'), sep = " ", names=["id", "path"])

processed = []
for index, row in tqdm(annotations.iterrows()):
    image_id = row.id
    image_path = row.path
    bbox  = bboxes[bboxes.id == image_id].iloc[0]
    
    img = cv2.imread(os.path.join(dataset_path, "images", image_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img[bbox.y:bbox.y + bbox.height, bbox.x:bbox.x + bbox.width, :]
    img = cv2.resize(img, (width, height))
    img = img.astype('float32') / 127.5 - 1
    
    processed.append(img)

11788it [01:07, 174.17it/s]


In [17]:
processed = np.array(processed)
np.random.shuffle(processed)
split = int(len(processed) * train_test_split)

train_dataloader = data.DataLoader(
    data.TensorDataset(torch.Tensor(processed[:split])),
    batch_size=batch,
    shuffle=True
)

test_dataloader = data.DataLoader(
    data.TensorDataset(torch.Tensor(processed[split:])),
    batch_size=batch
)

print("Train dataloader size:", len(train_dataloader))
print("Test dataloader size:", len(test_dataloader))

Train dataloader size: 166
Test dataloader size: 19
