In [12]:
#import necessary libraries 
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets 
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [13]:
#define the hyperparameters
learning_rate = 0.001
num_epochs = 10
batch_size = 64

In [14]:
#use torchvision to load the MNIST dataset
transform = transforms.ToTensor()
train_dataset = datasets.MNIST('./data', train = True, transform = transform, download = True)
test_dataset = datasets.MNIST(root = './data', train = False, transform= transform)
train_loader = DataLoader(dataset= train_dataset, batch_size= batch_size, shuffle = True)
test_loader= DataLoader(dataset = test_dataset, batch_size=batch_size, shuffle = False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:02<00:00, 3.53MB/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 104kB/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 948kB/s] 


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.57MB/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [15]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linearRelu = nn.Sequential(
            nn.Linear(28*28, 512), 
            nn.ReLU(),
            nn.Linear(512, 500),
            nn.ReLU(),
            nn.Linear(500, 10)
        )

    def forward(self, x):  # Make sure indentation is correct
        x = self.flatten(x)
        logits = self.linearRelu(x)
        return logits


In [16]:
#instantiate the model and move it to gpu if available 
device = "cuda" if torch.cuda.is_available() else "cpu"
model = NeuralNetwork().to(device)
print(f"Using {device} device")

Using cpu device


In [17]:
#define loss function and optimizer 
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)


In [18]:
#Training loop 
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        #forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        #backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")
        

Epoch [1/10], Loss: 0.2172
Epoch [2/10], Loss: 0.0982
Epoch [3/10], Loss: 0.0153
Epoch [4/10], Loss: 0.0468
Epoch [5/10], Loss: 0.1342
Epoch [6/10], Loss: 0.0174
Epoch [7/10], Loss: 0.0012
Epoch [8/10], Loss: 0.0051
Epoch [9/10], Loss: 0.0050
Epoch [10/10], Loss: 0.0001


In [22]:
#Model Evaluation
def check_accuracy(loader, model):
    if loader.dataset.train:
        print("Checking accuracy on training data.")
    else:
        print("Checking accuracy on test data.")
    
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x,y in loader:
            x, y = x.to(device), y.to(device)
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
    print(f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}")
    
    model.train()

check_accuracy(train_loader, model)
check_accuracy(test_loader, model)



Checking accuracy on training data.
Got 59857 / 60000 with accuracy 99.76
Checking accuracy on test data.
Got 9824 / 10000 with accuracy 98.24
