In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from tqdm import tqdm

In [2]:
T = transforms.ToTensor()

In [3]:
train_data = datasets.MNIST("mnist_data", 
                            download=True, 
                            train=True,
                            transform=T
                            )
test_data = datasets.MNIST("mnist_data", 
                           download=True, 
                           train=False,
                           transform=T
                           )

## Model 1

In [56]:
n_batch = 1

train_dataloader = DataLoader(train_data,
                              batch_size=n_batch
                              )
test_dataloader = DataLoader(test_data,
                             batch_size=n_batch
                             )

In [57]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x : torch.flatten(x))
])
train_data = datasets.MNIST("mnist_data", 
                            download=True, 
                            train=True,
                            transform=transform
                            )
test_data = datasets.MNIST("mnist_data", 
                           download=True, 
                           train=False,
                           transform=transform
                           )

In [58]:
train_data[0][0].shape

torch.Size([784])

In [59]:
model_1 = torch.nn.Sequential(
    torch.nn.Linear(in_features=784, out_features=1)
)

In [60]:
model_1

Sequential(
  (0): Linear(in_features=784, out_features=1, bias=True)
)

In [61]:
loss_fn = torch.nn.MSELoss()

In [62]:
optimizer = torch.optim.Adam(params=model_1.parameters(),
                             lr=1e-4)

In [63]:
optimizer.zero_grad()

In [64]:
num_epochs = 5

In [65]:
for epoch in tqdm(range(num_epochs)):
    error_epoch = []
    for X_batch, y_batch in train_dataloader:

        X_batch = X_batch.to(torch.float32)
        y_batch = y_batch.to(torch.float32)

        y_pred = model_1(X_batch)

        loss = loss_fn(y_pred, y_batch)
        error_epoch.append(loss.item())

        optimizer.zero_grad()

        loss.backward()
        optimizer.step()
    print(np.mean(error_epoch))

  return F.mse_loss(input, target, reduction=self.reduction)
 20%|██        | 1/5 [01:26<05:45, 86.40s/it]

4.450607973240248


 40%|████      | 2/5 [03:13<04:38, 92.68s/it]

3.6165011334559902


 60%|██████    | 3/5 [04:48<03:06, 93.16s/it]

3.4759167009533525


 80%|████████  | 4/5 [06:17<01:32, 92.03s/it]

3.3998366095505803


100%|██████████| 5/5 [08:05<00:00, 97.11s/it]

3.3514290645573546





In [66]:
y_batch.shape

torch.Size([1])

In [67]:
X_batch.shape

torch.Size([1, 784])

In [68]:
model_1(X_batch.flatten())

tensor([6.1693], grad_fn=<AddBackward0>)

#### Model 1 Score

In [69]:
next(iter(train_dataloader))

[tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000

In [70]:
next(iter(train_dataloader))[1].shape

torch.Size([1])

In [71]:
scores = []

with torch.no_grad():
    for X_batch, y_batch in train_dataloader:
        y_pred = model_1(X_batch)
        scores.append(((y_pred - y_batch).abs() < 0.5))

In [72]:
scores[0].to(torch.float64).mean()

tensor(0., dtype=torch.float64)

In [75]:
torch.tensor(scores).to(torch.float32).mean()

tensor(0.2352)

## Model 2

In [101]:
model_2 = torch.nn.Sequential(
    torch.nn.Linear(in_features=784, out_features=10),
    torch.nn.LogSoftmax()
)
model_2

Sequential(
  (0): Linear(in_features=784, out_features=10, bias=True)
  (1): LogSoftmax(dim=None)
)

In [102]:
train_dataloader_2 = DataLoader(
    train_data,
    batch_size=64
)

In [103]:
loss_fn = torch.nn.NLLLoss()

In [104]:
optimizer_2 = torch.optim.Adam(model_2.parameters(),
                               lr = 1e-4)

In [105]:
for epoch in tqdm(range(num_epochs)):
    error_epoch_2 = []
    for X_batch, y_batch in train_dataloader_2:

        X_batch = X_batch#.to(torch.float64)
        y_batch = y_batch#.to(torch.float64)

        y_pred = model_2(X_batch)

        loss = loss_fn(y_pred, y_batch)
        error_epoch.append(loss.item())

        optimizer_2.zero_grad()

        loss.backward()
        optimizer_2.step()
    print(np.mean(error_epoch))

  input = module(input)
 20%|██        | 1/5 [00:12<00:51, 12.88s/it]

0.8969220445076349


 40%|████      | 2/5 [00:25<00:38, 12.83s/it]

0.8379552379202868


 60%|██████    | 3/5 [00:38<00:25, 12.81s/it]

0.7658031801405206


 80%|████████  | 4/5 [00:50<00:12, 12.73s/it]

0.7066738796958537


100%|██████████| 5/5 [01:03<00:00, 12.70s/it]

0.6596008835396151





#### Model 1 Score

In [131]:
scores_2 = []

with torch.no_grad():
    for X_batch, y_batch in train_dataloader:
        y_pred_2 = model_2(X_batch)
        scores_2.append(((y_pred_2.argmax(dim=1) == y_batch) ))

  input = module(input)


In [136]:
a = torch.concat(scores_2)
a.to(torch.float32).mean()

tensor(0.8945)

In [126]:
y_pred_2.argmax(dim=1).shape

torch.Size([64])

In [129]:
len(scores_2)

938

In [128]:
torch.tensor(scores_2[0]).to(torch.float32).mean()

  torch.tensor(scores_2[0]).to(torch.float32).mean()


tensor(0.9219)