<a href="https://colab.research.google.com/github/noallynoclan/colab/blob/master/pytorch_tutorials_02_simple_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from time import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True)
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(test_dataset, batch_size, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
def accuracy(model, data_loader):
    num_correct = 0
    num_samples = 0
    model.eval()
    with torch.no_grad():
        for batch_idx, (data, targets) in enumerate(data_loader):
            data = data.to(model.device)
            targets = targets.to(model.device)
            scores = model(data)
            predictions = scores.argmax(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)
    acc = num_correct / num_samples
    return acc

def train(model, data_loader, num_epochs, learning_rate):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    for epoch in range(num_epochs):
        model.train()
        start_time = time()
        for batch_idx, (data, targets) in enumerate(data_loader):
            data = data.to(model.device)
            targets = targets.to(model.device)
            scores = model(data)
            loss = model.loss(scores, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f'epoch {epoch}, {time() - start_time:.1f}s: {accuracy(model, data_loader):.1%}')

In [None]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, x): # [None, 1, 28, 28]
        x = x.reshape(x.shape[0], -1) # [None, 784]
        x = F.relu(self.fc1(x)) # [None, 64]
        x = self.fc2(x) # [None, 10]
        return x

input_size = 784
num_classes = 10
learning_rate = 1e-3
batch_size = 64
num_epochs = 2

model = NN(input_size, num_classes)
train(model, train_loader, num_epochs, learning_rate)
print(f'test: {accuracy(model, test_loader):.1%}')

epoch 0, 5.6s: 93.4%
epoch 1, 5.8s: 95.5%
test: 95.2%


In [None]:
class CNN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(CNN, self).__init__()
        # n_out = (n_in + 2 * padding - kernel) / stride + 1
        self.conv1 = nn.Conv2d(in_channels, out_channels=8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, x): # [None, 1, 28, 28]
        x = F.relu(self.conv1(x)) # [None, 8, 28, 28]
        x = self.pool(x) # [None, 8, 14, 14]
        x = F.relu(self.conv2(x)) # [None, 16, 14, 14]
        x = self.pool(x) # [None, 16, 7, 7]
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x) # [None, 10]
        return x

in_channels = 1
num_classes = 10
learning_rate = 1e-3
num_epochs = 2

model = CNN(in_channels, num_classes)
train(model, train_loader, num_epochs, learning_rate)
print(f'test: {accuracy(model, test_loader):.1%}')

epoch 0, 5.8s: 96.6%
epoch 1, 6.2s: 97.6%
test: 97.6%


In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size 
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, x): # [None, 1, 28, 28]
        x = x.squeeze(1) # [None, 28, 28]
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        out, _ = self.rnn(x, h0) # [None, 28, 256]
        out = out.reshape(out.shape[0], -1) # [None, 7168]
        out = self.fc(out) # [None, 10]
        return out

input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

model = RNN(input_size, hidden_size, num_layers, num_classes)
# x = torch.empty(64, 1, 28, 28)
# model(x)
train(model, train_loader, num_epochs, learning_rate)
print(f'test: {accuracy(model, test_loader):.1%}')

epoch 0, 8.8s: 95.6%
epoch 1, 8.2s: 97.1%
test: 97.1%


In [None]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size 
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, x): # [None, 1, 28, 28]
        x = x.squeeze(1) # [None, 28, 28]
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        out, _ = self.gru(x, h0) # [None, 28, 256]
        out = out.reshape(out.shape[0], -1) # [None, 7168]
        out = self.fc(out) # [None, 10]
        return out

input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

model = GRU(input_size, hidden_size, num_layers, num_classes)
# x = torch.empty(64, 1, 28, 28)
# model(x)
train(model, train_loader, num_epochs, learning_rate)
print(f'test: {accuracy(model, test_loader):.1%}')

epoch 0, 133.2s: 97.9%
epoch 1, 134.8s: 97.2%
test: 97.2%


In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size 
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, x): # [None, 1, 28, 28]
        x = x.squeeze(1) # [None, 28, 28]
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        out, _ = self.lstm(x, (h0, c0)) # [None, 28, 256]
        out = out.reshape(out.shape[0], -1) # [None, 7168]
        out = self.fc(out) # [None, 10]
        return out

input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

model = LSTM(input_size, hidden_size, num_layers, num_classes)
# x = torch.empty(64, 1, 28, 28)
# model(x)
train(model, train_loader, num_epochs, learning_rate)
print(f'test: {accuracy(model, test_loader):.1%}')

epoch 0, 167.3s: 97.6%
epoch 1, 163.7s: 98.7%
test: 98.4%


In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size 
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, x): # [None, 1, 28, 28]
        x = x.squeeze(1) # [None, 28, 28]
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        out, _ = self.lstm(x, (h0, c0)) # [None, 28, 256]
        out = out[:, -1, :] # [None, 1, 256]
        out = self.fc(out) # [None, 10]
        return out

input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

model = LSTM(input_size, hidden_size, num_layers, num_classes)
# x = torch.empty(64, 1, 28, 28)
# model(x)
train(model, train_loader, num_epochs, learning_rate)
print(f'test: {accuracy(model, test_loader):.1%}')

epoch 0, 9.1s: 97.1%
epoch 1, 9.3s: 97.7%
test: 97.8%


In [None]:
class BLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BLSTM, self).__init__()
        self.hidden_size = hidden_size 
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, num_classes)
        self.loss = nn.CrossEntropyLoss()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, x): # [None, 1, 28, 28]
        x = x.squeeze(1) # [None, 28, 28]
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(self.device) # [2, None, 256]
        out, _ = self.lstm(x, (h0, c0)) # [None, 28, 256]
        out = out[:, -1, :] # [None, 1, 256]
        out = self.fc(out) # [None, 10]
        return out


input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

model = BLSTM(input_size, hidden_size, num_layers, num_classes)
# x = torch.empty(64, 1, 28, 28)
# model(x)
train(model, train_loader, num_epochs, learning_rate)
print(f'test: {accuracy(model, test_loader):.1%}')

epoch 0, 12.9s: 95.3%
epoch 1, 12.8s: 97.4%
test: 97.0%


In [None]:
model.

BLSTM(
  (lstm): LSTM(28, 256, num_layers=2, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=512, out_features=10, bias=True)
  (loss): CrossEntropyLoss()
)