<a href="https://colab.research.google.com/github/kangwonlee/pytorch-ibm-coursera/blob/main/week01_01_05.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Hello PyTorch 👋🏻



references
* https://www.coursera.org/learn/deep-neural-networks-with-pytorch/
* https://github.com/damounayman/Deep-Neural-Networks-with-PyTorch/blob/main/Week1/1D_tensors.ipynb



## week 1



### 1.5 Dataset



#### Dataset Class
for images



In [None]:
import gzip
import pathlib
import sys


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import PIL
import torch.utils.data



dataset :
* https://github.com/zalandoresearch/fashion-mnist
* 28 x 28 bitmaps



<img src="https://github.com/zalandoresearch/fashion-mnist/raw/master/doc/img/fashion-mnist-sprite.png" width=300 height=300></img>


In [None]:
try:
  repo_path = pathlib.Path('fashion-mnist')
  assert repo_path.exists()
except AssertionError:
  import subprocess
  p = subprocess.run(
      ['git', 'clone', 'https://github.com/zalandoresearch/fashion-mnist'],
      stdout=subprocess.PIPE, stderr=subprocess.PIPE,
      encoding='utf-8',
  )
  assert (0 == p.returncode), (
      '\n'
      f"code : {p.check_returncode}\n"
      f"output : {p.stdout}\n"
      f"error  : {p.stderr}"
  )

assert repo_path.is_dir()
data_utils_path = (repo_path / 'utils').resolve()
assert data_utils_path.exists()
assert data_utils_path.is_dir()

sys.path.insert(0, str(repo_path / 'utils'))

import mnist_reader



In [None]:
fashion_path = repo_path / 'data' / 'fashion'
assert fashion_path.exists()
assert fashion_path.is_dir()

X_train, y_train = mnist_reader.load_mnist(fashion_path, kind='train')
X_test, y_test = mnist_reader.load_mnist(fashion_path, kind='t10k')



To reset repo :



In [None]:
#!rm -rf fashion-mnist/



In [None]:
class Dataset(torch.utils.data.Dataset):

  def __init__(self, data_path=fashion_path, transform=None):
    self.transform = transform
    self.data_path = data_path
    self.y_train = self.read_labels()
    self.len = self.y_train.shape[0]

  def read_labels(self, kind='train'):
    labels_path = self.data_path / f'{kind}-labels-idx1-ubyte.gz'
    assert labels_path.exists
    assert labels_path.is_file()

    with gzip.open(labels_path, 'rb') as f_labels:
      labels = np.frombuffer(
        f_labels.read(),
        dtype=np.uint8,
        offset=8,
      )

    return labels

  def read_image(self, k, kind='train'):
    image_path = self.data_path / f'{kind}-images-idx3-ubyte.gz'
    assert image_path.exists
    assert image_path.is_file()

    # TODO : save memory
    with gzip.open(image_path, 'rb') as f_images:
      images = np.frombuffer(
        f_images.read(),
        dtype=np.uint8,
        offset=16,
      ).reshape(self.len, 784)

    return images[k]

  def __len__(self):
    return self.len

  def __getitem__(self, idx):
    image = PIL.Image.fromarray(self.read_image(idx).reshape(28, 28))
    y = self.y_train[idx]

    if self.transform:
      image = self.transform(image)

    return image, y



In [None]:
lookup = (
  'T-shirt/top', 'Trouser', 'Pullover',
  'Dress', 'Coat', 'Sandal',
  'Shirt', 'Sneaker', 'Bag',
  'Ankle boot',
)



In [None]:
dataset = Dataset()
n_subplots = 5
_, axs = plt.subplots(n_subplots, 1,figsize=(9, 16))
for k in range(n_subplots):
  x, y = dataset[k]
  axs[k].imshow(x)
  axs[k].set_title(lookup[y])



Declare data transform



In [None]:
import torchvision.transforms

croptensor_data_transform = torchvision.transforms.Compose(
    [
        torchvision.transforms.CenterCrop(20),
        torchvision.transforms.ToTensor(),
    ]
)



In [None]:
dataset = Dataset(transform=croptensor_data_transform)
n_subplots = 5
_, axs = plt.subplots(n_subplots, 1,figsize=(9, 16))
for k in range(n_subplots):
  x, y = dataset[k]
  axs[k].imshow(x.squeeze()) # [1, 28, 28] -> [28, 28]
  axs[k].set_title((x.shape,lookup[y]))



#### Dataset



In [None]:
import torchvision.datasets

mnist_root_path = pathlib.Path('mnist')

dataset = torchvision.datasets.MNIST(
    root=mnist_root_path,
    train=False,
    download=True,
    transform=torchvision.transforms.ToTensor(),
)

