In [None]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Compose, Normalize
import matplotlib.pyplot as plt

In [7]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [8]:
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True, pin_memory=True, num_workers=4)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=False, pin_memory=True, num_workers=4)

In [9]:
print(training_data.classes)
print(training_data.data.size())

['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
torch.Size([60000, 28, 28])


In [10]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.fc(x)
        return logits

In [11]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(device)
model = NeuralNetwork().to(device)
print(model)

cuda
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [12]:
x = torch.rand(1,28,28,device=device)
logits = model(x)

In [13]:
logits

tensor([[ 0.1705,  0.0160,  0.0126,  0.0451, -0.0722,  0.0665, -0.0425,  0.0126,
          0.0569, -0.0431]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [14]:
pred_probab = nn.Softmax(dim=1)(logits)
pred_probab

tensor([[0.1157, 0.0992, 0.0988, 0.1021, 0.0908, 0.1043, 0.0935, 0.0988, 0.1033,
         0.0935]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [15]:
y_pred = pred_probab.argmax(1)
y_pred

tensor([0], device='cuda:0')

In [16]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [17]:
def train(dataloader, model, loss_fn, optimizer):
    size= len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        pred = model(X)
        loss= loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss : {loss:>7f}, [{current:>5d}/{size:>5}]")



In [18]:
def test(dataloader, model, loss_fn):
    size= len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct= 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)

            pred = model(X)
            test_loss+= loss_fn(pred, y).item()
            correct+= (pred.argmax(1) == y).type(torch.float).sum().item()
    
    test_loss/=num_batches
    correct/=size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}\n")



In [19]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n--------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)


Epoch 1
--------------------------------
loss : 2.301265, [   64/60000]
loss : 2.312831, [ 6464/60000]
loss : 2.290009, [12864/60000]
loss : 2.294548, [19264/60000]
loss : 2.292637, [25664/60000]
loss : 2.280230, [32064/60000]
loss : 2.273149, [38464/60000]
loss : 2.272494, [44864/60000]
loss : 2.271684, [51264/60000]
loss : 2.276307, [57664/60000]
Test Error: 
 Accuracy: 19.4%, Avg loss: 2.264792

Epoch 2
--------------------------------
loss : 2.260891, [   64/60000]
loss : 2.257542, [ 6464/60000]
loss : 2.255117, [12864/60000]
loss : 2.253279, [19264/60000]
loss : 2.247385, [25664/60000]
loss : 2.232584, [32064/60000]
loss : 2.232953, [38464/60000]
loss : 2.223917, [44864/60000]
loss : 2.221425, [51264/60000]
loss : 2.214407, [57664/60000]
Test Error: 
 Accuracy: 48.9%, Avg loss: 2.207281

Epoch 3
--------------------------------
loss : 2.222402, [   64/60000]
loss : 2.206892, [ 6464/60000]
loss : 2.205179, [12864/60000]
loss : 2.187918, [19264/60000]
loss : 2.171692, [25664/60000]


In [24]:
example_inputs = (torch.randn(1, 1, 28, 28),)
model.to("cpu")
onnx_program = torch.onnx.export(model, example_inputs, dynamo=True)

TypeError: Expecting a type not f<class 'typing.Union'> for typeinfo.