In [1]:
!nvidia-smi

Tue Sep 22 10:57:59 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66       Driver Version: 450.66       CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce GTX 105...  Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   52C    P3    N/A /  N/A |    493MiB /  2002MiB |     10%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import torch 
import sklearn

In [3]:
print(torch.__version__)
print(sklearn.__version__)

1.6.0+cu101
0.23.2


In [6]:
# set device to gpu if it's available 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.current_device())

cuda
0


Use ```torch.rand(10).to(device)```

In [7]:
a = torch.rand(10).to(device)

In [8]:
print(a.device)

cuda:0


## Construct a simple NN with two hidden layers using Pytorch

### Load MNIST datasets from pytorch 

In [90]:
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, random_split
from sklearn.model_selection import train_test_split

# MNIST dataset 
total_datasets = torchvision.datasets.MNIST(root='./', train=True, transform=transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root='./', train=False, transform=transforms.ToTensor())

# Split into validation and test
train_dataset, val_dataset = random_split(total_datasets, [50000, 10000])

print(f"Total dataset:\n{len(total_datasets)}\n")
print(f"Training dataset, m:\n{len(train_dataset)}\n")
print(f"Validation dataset:\n{len(val_dataset)}\n")
print(f"Test dataset:\n{len(test_dataset)}\n")

Total dataset:
60000

Training dataset, m:
50000

Validation dataset:
10000

Test dataset:
10000



In [91]:
train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=100, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

images, labels = next(iter(train_loader))
print(images.shape)

torch.Size([128, 1, 28, 28])


### Logistic Regression of 2 hidden layers

In [86]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Model, self).__init__()
        self.L1 = nn.Linear(input_size, hidden_size)
        self.L2 = nn.Linear(hidden_size, hidden_size)
        self.L3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, A0):
        Z1 = self.L1(A0)
        A1 = self.relu(Z1)
        Z2 = self.L2(A1)
        A2 = self.relu(Z2)
        Y_hat = self.L3(A2)
        return Y_hat