In [1]:
import torch 
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import numpy as np

### Loading the data

In [2]:
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:03<00:00, 2894350.61it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 225205.33it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1453341.54it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4545580.71it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [3]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000, shuffle=False)

In [44]:
image, label = train_dataset[0]
img_shape = image.shape
flattened_image = image.view(-1)
print(len(image))
print(len(flattened_image))

1
784


### Neural Network with one hidden layer

In [80]:
class NN(nn.Module):
    
    def __init__(self, D_in, H, D_out):
        super(NN, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.linear2 = nn.Linear(H, D_out)
   
    def forward(self, x):
        x = torch.sigmoid(self.linear1(x))  
        x = self.linear2(x)
        return x

    def train(self, model, train_data, criterion, optimizer, epochs):
        for epoch in range(epochs):
            i = 0
            for i, (x, y) in enumerate(train_loader): 
                optimizer.zero_grad()
                z = model(x.view(x.size(0), -1))
                loss = criterion(z, y)
                loss.backward()
                optimizer.step()

    def val_acc(self, model, val_data):
        correct = 0
        for x, y in val_data:
            z = model(x.view(x.size(0), -1))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        return 100 * (correct / len(validation_dataset))

In [83]:
input_dim = len(flattened_image)
hidden_dim = 100
output_dim = 10

model1 = NN(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model1.parameters(), lr=learning_rate)
model1.train(model1, train_loader, criterion, optimizer, epochs=30)

In [84]:
print(model1.val_acc(model1, validation_loader))

60.150000000000006


### improve my model for more accuracy and use gpu

In [85]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [86]:
# check if gpu works
print(torch.cuda.is_available())

True


In [89]:
class NNGPU(nn.Module):
    
    def __init__(self, D_in, H, D_out):
        super(NNGPU, self).__init__()
        self.linear1 = nn.Linear(D_in, H)
        self.relu = nn.ReLU()  
        self.linear2 = nn.Linear(H, D_out)
   
    def forward(self, x):
        x = self.relu(self.linear1(x))
        x = self.linear2(x)  
        return x

    def train(self, train_data, criterion, optimizer, epochs=100):
        self.to(device)  
        for epoch in range(epochs):
            for i, (x, y) in enumerate(train_data): 
                x, y = x.to(device), y.to(device)  
                
                optimizer.zero_grad()
                z = self.forward(x.view(x.size(0), -1))
                loss = criterion(z, y)
                loss.backward()
                optimizer.step()

    def val_acc(self, val_data):
        self.to(device)  
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in val_data:
                x, y = x.to(device), y.to(device)  
                z = self.forward(x.view(x.size(0), -1))
                _, predicted = torch.max(z.data, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()
        return 100 * (correct / total)

In [92]:
input_dim = len(flattened_image)
hidden_dim = 100
output_dim = 10

model2 = NNGPU(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer2 = torch.optim.SGD(model2.parameters(), lr=learning_rate)
model2.train(train_loader, criterion, optimizer2, epochs=30)

In [94]:
print(model2.val_acc(validation_loader))

85.92999999999999
