In [None]:

import torch
from torch import nn
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import sklearn
from pathlib import Path


train_dataset = datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

test_dataset = datasets.FashionMNIST(
    root='data',
    train=False,
    download=True,
    transform=ToTensor(),
    target_transform=None
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 17359597.60it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 304785.13it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5443098.19it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 16296058.11it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [None]:
from torch.utils.data import DataLoader
BATCH_SIZE = 32
device = 'cuda' if torch.cuda.is_available else 'cpu'


train_dataloader = DataLoader(dataset=train_dataset,
                              batch_size = BATCH_SIZE,
                              shuffle=True)

test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=False)
print(next(iter(train_dataloader))[1].shape)



torch.Size([32])


In [None]:
from tqdm.auto import tqdm

class Model_0 (nn.Module):
  def __init__(self, input_shape, hidden_units, output_shape):
    super().__init__()
    self.layers = nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=input_shape, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=hidden_units),
        nn.ReLU(),
        nn.Linear(in_features=hidden_units, out_features=output_shape),
    )

  def forward(self, X):
    return self.layers(X)


model_0 = Model_0(784, 16, 10)
model_0.to(device)


# Training
torch.manual_seed(90)
torch.cuda.manual_seed(90)

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.1)

epochs = 4

for epoch in tqdm(range(epochs)):
  train_loss = 0

  model_0.train()
  for batch, (X, y) in enumerate(train_dataloader):
    X = X.to(device)
    y = y.to(device)
    y_out = model_0(X)
    loss = loss_fn(y_out, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_loss += loss

  train_loss /= len(train_dataloader)

  model_0.eval()
  with torch.inference_mode():
    test_loss = 0
    test_accuracy = 0

    for batch, (X_test, y_test) in enumerate(test_dataloader):
      X_test = X_test.to(device)
      y_test = y_test.to(device)
      test_out = model_0(X_test)
      test_preds = torch.argmax(torch.softmax(test_out, dim=1), dim=1)

      accuracy = torch.sum(test_preds == y_test) / len(y_test)
      loss = loss_fn(test_out, y_test)

      test_loss += loss
      test_accuracy += accuracy

    test_loss /= len(test_dataloader)
    test_accuracy /= len(test_dataloader)

  if epoch % 2 == 0:
    print(f" train loss = {train_loss} | test loss = {test_loss} | test_accuracy = {test_accuracy}")

  0%|          | 0/4 [00:00<?, ?it/s]

 train loss = 0.7349809408187866 | test loss = 0.4821367859840393 | test_accuracy = 0.8247803449630737
 train loss = 0.4285508692264557 | test loss = 0.43346917629241943 | test_accuracy = 0.8398562073707581


In [None]:
from tqdm.auto import tqdm


class CNN(nn.Module):
  def __init__(self, input_shape, hidden_units, output_shape):
    super().__init__()

    self.conv_block_0 = nn.Sequential(
        nn.Conv2d(input_shape,
                  hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1),
        nn.ReLU(),
        nn.Conv2d(hidden_units,
                  hidden_units,
                  kernel_size=3,
                  stride=1,
                  padding=1
                  ),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(hidden_units,
                  hidden_units,
                  kernel_size=3,
                  padding=1,
                  stride=1),
        nn.ReLU(),
        nn.Conv2d(hidden_units,
                  hidden_units,
                  kernel_size=3,
                  padding=1,
                  stride=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(hidden_units * 7 * 7, output_shape)
    )

  def forward(self, X):
    X = self.conv_block_0(X)
    X = self.conv_block_1(X)
    X = self.classifier(X)
    return X


In [None]:
torch.manual_seed(90)
torch.cuda.manual_seed(90)

conv_model = CNN(1, 32, len(train_dataset.classes)).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(conv_model.parameters(), lr=0.1)

epochs = 17

for epoch in tqdm(range(epochs)):

  conv_model.train()
  train_loss = 0
  for batch, (X, y) in enumerate(train_dataloader):
    X, y = X.to(device), y.to(device)
    y_out = conv_model(X)
    y_preds = torch.softmax(y_out, dim=1).argmax(dim=1)

    loss = loss_fn(y_out, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_loss += loss
  train_loss /= len(train_dataloader)

  conv_model.eval()
  with torch.inference_mode():
    test_loss = 0
    test_accuracy = 0
    for batch, (X, y) in enumerate(test_dataloader):
      X, y = X.to(device), y.to(device)
      y_out = conv_model(X)
      y_preds = torch.softmax(y_out, dim=1).argmax(dim=1)

      loss = loss_fn(y_out, y)
      test_loss += loss
      test_accuracy += torch.sum(y_preds == y)/len(y)

    test_loss /= len(test_dataloader)
    test_accuracy /= len(test_dataloader)

  if epoch % 2 == 0:
      print(f" epoch = {epoch} | Train loss = {train_loss} | Test Loss = {test_loss} | Test accuracy = {test_accuracy}")






  0%|          | 0/17 [00:00<?, ?it/s]

 epoch = 0 | Train loss = 0.5361519455909729 | Test Loss = 0.3531529903411865 | Test accuracy = 0.798622190952301
 epoch = 2 | Train loss = 0.26627930998802185 | Test Loss = 0.26470986008644104 | Test accuracy = 0.8195886611938477
 epoch = 4 | Train loss = 0.22276246547698975 | Test Loss = 0.2477423995733261 | Test accuracy = 0.8217851519584656
 epoch = 6 | Train loss = 0.19822123646736145 | Test Loss = 0.24040016531944275 | Test accuracy = 0.8189895749092102
 epoch = 8 | Train loss = 0.17788174748420715 | Test Loss = 0.2516052722930908 | Test accuracy = 0.8163937330245972
 epoch = 10 | Train loss = 0.16195747256278992 | Test Loss = 0.2634595036506653 | Test accuracy = 0.8029153347015381
 epoch = 12 | Train loss = 0.14942604303359985 | Test Loss = 0.26276326179504395 | Test accuracy = 0.802515983581543
 epoch = 14 | Train loss = 0.13828541338443756 | Test Loss = 0.2607952356338501 | Test accuracy = 0.8085063695907593
 epoch = 16 | Train loss = 0.12897293269634247 | Test Loss = 0.284336