In [1]:
!nvidia-smi

Wed Sep 23 11:14:35 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66       Driver Version: 450.66       CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce GTX 105...  Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   43C    P3    N/A /  N/A |    234MiB /  2002MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
import torch 
import sklearn

In [2]:
print(torch.__version__)
print(sklearn.__version__)

1.6.0+cu101
0.23.2


In [3]:
# set device to gpu if it's available 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.current_device())

cuda
0


Use ```torch.rand(10).to(device)```

In [7]:
a = torch.rand(10).to(device)

In [10]:
print(a)
print(a.device)

tensor([0.1507, 0.2810, 0.4432, 0.0280, 0.6526, 0.2904, 0.2309, 0.3560, 0.9437,
        0.6874], device='cuda:0')
cuda:0


## Construct a simple NN with two hidden layers using Pytorch

### Load MNIST datasets from pytorch 

In [4]:
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, random_split
from sklearn.model_selection import train_test_split

# MNIST dataset 
total_datasets = torchvision.datasets.MNIST(root='./', train=True, transform=transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root='./', train=False, transform=transforms.ToTensor())

# Split into validation and test
train_dataset, val_dataset = random_split(total_datasets, [50000, 10000])

print(f"Total dataset:\n{len(total_datasets)}\n")
print(f"Training dataset, m:\n{len(train_dataset)}\n")
print(f"Validation dataset:\n{len(val_dataset)}\n")
print(f"Test dataset:\n{len(test_dataset)}\n")

Total dataset:
60000

Training dataset, m:
50000

Validation dataset:
10000

Test dataset:
10000



In [34]:
train_loader = DataLoader(dataset=train_dataset, batch_size=250, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=1000, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=1000, shuffle=False)

images, labels = next(iter(train_loader))
print(images.shape)

n = images.shape[2] * images.shape[3]
output_size = len(total_datasets.classes)
hidden_size = 60

print(f"number of features, n:\n{n}\n")
print(f"hidden size;\n{hidden_size}\n")
print(f"output size:\n{output_size}\n")

torch.Size([250, 1, 28, 28])
number of features, n:
784

hidden size;
60

output size:
10



### Logistic Regression of 2 hidden layers

In [29]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Model, self).__init__()
        self.L1 = nn.Linear(input_size, hidden_size)
        self.L2 = nn.Linear(hidden_size, hidden_size)
        self.L3 = nn.Linear(hidden_size, output_size)
        self.tanh = nn.Tanh()
        
    def forward(self, A0):
        Z1 = self.L1(A0)
        A1 = self.tanh(Z1)
        Z2 = self.L2(A1)
        A2 = self.tanh(Z2)
        Z3 = self.L2(A2)
        A3 = self.tanh(Z3)
        Y_hat = self.L3(A3)
        return Y_hat

In [35]:
# First, create model
model = Model(n, hidden_size, output_size)

# upload model to gpu
model = model.to(device)

In [36]:
# Define loss function
loss_fn = nn.CrossEntropyLoss()

# Define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.075, momentum=0.9)

total_iterations = len(train_loader)
print(f"total iterations:\n{total_iterations}\n")

total iterations:
200



In [37]:
for epoch in range(20):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # forward propagation
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        
        # backward propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print loss for every 100 steps
        if (i+1) % 100 == 0:
            print(f"Epoch [{epoch+1}]; Step[{i+1}/{total_iterations}]; Loss: {loss.item()}")
    
    with torch.no_grad():
        wrong = 0
        total = 0
        for images, labels in val_loader:
            images = images.reshape(-1, 28*28).to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            wrong += (predicted != labels).sum().item()
            
    print(f"Cross Validation Error: {100*wrong/total}")

Epoch [1]; Step[100/200]; Loss: 0.4104525148868561
Epoch [1]; Step[200/200]; Loss: 0.14551442861557007
Cross Validation Error: 6.86
Epoch [2]; Step[100/200]; Loss: 0.18161030113697052
Epoch [2]; Step[200/200]; Loss: 0.10409998893737793
Cross Validation Error: 4.75
Epoch [3]; Step[100/200]; Loss: 0.11801781505346298
Epoch [3]; Step[200/200]; Loss: 0.12576176226139069
Cross Validation Error: 4.1
Epoch [4]; Step[100/200]; Loss: 0.08139833807945251
Epoch [4]; Step[200/200]; Loss: 0.05503647029399872
Cross Validation Error: 3.45
Epoch [5]; Step[100/200]; Loss: 0.06068909168243408
Epoch [5]; Step[200/200]; Loss: 0.09272748976945877
Cross Validation Error: 3.42
Epoch [6]; Step[100/200]; Loss: 0.05771825462579727
Epoch [6]; Step[200/200]; Loss: 0.04894643649458885
Cross Validation Error: 3.13
Epoch [7]; Step[100/200]; Loss: 0.03518432751297951
Epoch [7]; Step[200/200]; Loss: 0.06492739170789719
Cross Validation Error: 2.92
Epoch [8]; Step[100/200]; Loss: 0.02612335793673992
Epoch [8]; Step[200

In [39]:
# Check test accuracy
with torch.no_grad():
    test_correct = 0
    test_total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

print(f"Accuracy of model: {100*test_correct/test_total}%")

Accuracy of model: 97.3%


## ConvNet

In [41]:
# Convolutional Neural Network with 2 layers
class ConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

In [42]:
# First, create model
model = ConvNet()

# upload model to gpu
model = model.to(device)

In [43]:
# Define loss function
loss_fn = nn.CrossEntropyLoss()

# Define optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.075, momentum=0.9)

total_iterations = len(train_loader)
print(f"total iterations:\n{total_iterations}\n")

total iterations:
200



In [45]:
for epoch in range(20):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # forward propagation
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        
        # backward propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # print loss for every 100 steps
        if (i+1) % 100 == 0:
            print(f"Epoch [{epoch+1}]; Step[{i+1}/{total_iterations}]; Loss: {loss.item()}")
    
    with torch.no_grad():
        wrong = 0
        total = 0
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            wrong += (predicted != labels).sum().item()
            
    print(f"Cross Validation Error: {100*wrong/total}")

Epoch [1]; Step[100/200]; Loss: 0.8110878467559814
Epoch [1]; Step[200/200]; Loss: 0.3705999255180359
Cross Validation Error: 11.48
Epoch [2]; Step[100/200]; Loss: 0.25753629207611084
Epoch [2]; Step[200/200]; Loss: 0.22113129496574402
Cross Validation Error: 6.31
Epoch [3]; Step[100/200]; Loss: 0.2028256058692932
Epoch [3]; Step[200/200]; Loss: 0.17020109295845032
Cross Validation Error: 5.04
Epoch [4]; Step[100/200]; Loss: 0.09933213144540787
Epoch [4]; Step[200/200]; Loss: 0.12741895020008087
Cross Validation Error: 4.04
Epoch [5]; Step[100/200]; Loss: 0.11212226748466492
Epoch [5]; Step[200/200]; Loss: 0.12445715069770813
Cross Validation Error: 3.48
Epoch [6]; Step[100/200]; Loss: 0.12142695486545563
Epoch [6]; Step[200/200]; Loss: 0.07061251252889633
Cross Validation Error: 3.2
Epoch [7]; Step[100/200]; Loss: 0.09561962634325027
Epoch [7]; Step[200/200]; Loss: 0.08732737600803375
Cross Validation Error: 3.1
Epoch [8]; Step[100/200]; Loss: 0.11328326910734177
Epoch [8]; Step[200/2

In [48]:
# Check test accuracy
with torch.no_grad():
    test_correct = 0
    test_total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

print(f"Accuracy of model: {100*test_correct/test_total}%")

Accuracy of model: 98.01%


## Conclusion

As we can see from the above examples, ConvNet performs better than multilayer perceptron (multiple layers of logistic regression).