In [1]:
import torch 
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import numpy as np

In [2]:
train_dataset = dsets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
validation_dataset = dsets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

In [3]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=2000, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=5000, shuffle=False)

In [4]:
image, label = train_dataset[0]
img_shape = image.shape
flattened_image = image.view(-1)
print(len(image))
print(len(flattened_image))

1
784


In [5]:
class DeepNN(nn.Module):
    
    def __init__(self, Layers):
        super(DeepNN, self).__init__()
        self.hidden = nn.ModuleList()
        for input_size, output_size in zip(Layers, Layers[1:]):
            self.hidden.append(nn.Linear(input_size, output_size))
    
    def forward(self, activation):
        L = len(self.hidden)
        for (l, linear_transform) in zip(range(L), self.hidden):
            if l < L - 1:
                activation = torch.relu(linear_transform(activation))
            else:
                activation = linear_transform(activation)
        return activation
    
    def train(self, model, train_data, criterion, optimizer, epochs):
        for epoch in range(epochs):
            i = 0
            for i, (x, y) in enumerate(train_loader): 
                optimizer.zero_grad()
                z = model(x.view(x.size(0), -1))
                loss = criterion(z, y)
                loss.backward()
                optimizer.step()

    def val_acc(self, model, val_data):
        correct = 0
        for x, y in val_data:
            z = model(x.view(x.size(0), -1))
            _, label = torch.max(z, 1)
            correct += (label == y).sum().item()
        return 100 * (correct / len(validation_dataset))

In [11]:
input_dim = len(flattened_image)
Layers = [input_dim, 100, 50, 10]
model = DeepNN(Layers)
learning_rate = 0.10
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
model.train(model, train_loader, criterion, optimizer, epochs=30)

In [13]:
print(model.val_acc(model, validation_loader))

93.26


### Deep Neural Network using GPU

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [15]:
# check if gpu works
print(torch.cuda.is_available())

True


In [16]:
class GPUDeepNN(nn.Module):
    
    def __init__(self, Layers):
        super(GPUDeepNN, self).__init__()
        self.hidden = nn.ModuleList()
        for input_size, output_size in zip(Layers, Layers[1:]):
            self.hidden.append(nn.Linear(input_size, output_size))
    
    def forward(self, activation):
        L = len(self.hidden)
        for (l, linear_transform) in zip(range(L), self.hidden):
            if l < L - 1:
                activation = torch.relu(linear_transform(activation))
            else:
                activation = linear_transform(activation)
        return activation
    
    def train(self, train_data, criterion, optimizer, epochs=100):
        self.to(device)  
        for epoch in range(epochs):
            for i, (x, y) in enumerate(train_data): 
                x, y = x.to(device), y.to(device)  
                
                optimizer.zero_grad()
                z = self.forward(x.view(x.size(0), -1))
                loss = criterion(z, y)
                loss.backward()
                optimizer.step()

    def val_acc(self, val_data):
        self.to(device)  
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in val_data:
                x, y = x.to(device), y.to(device)  
                z = self.forward(x.view(x.size(0), -1))
                _, predicted = torch.max(z.data, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()
        return 100 * (correct / total)

In [18]:
input_dim = len(flattened_image)
Layers = [input_dim, 100, 50, 10]
model2 = GPUDeepNN(Layers)
learning_rate = 0.10
optimizer2 = torch.optim.SGD(model2.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
model2.train(train_loader, criterion, optimizer2, epochs=30)

In [19]:
print(model2.val_acc(validation_loader))

93.69
