In [1]:
import torch
import torch.nn as nn

import torch.optim as optim

import torchvision
from torchvision import datasets
from torchvision import transforms

from torch.utils.data import DataLoader

In [21]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [22]:
class LeNet5(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.AvgPool = nn.AvgPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=6,
            kernel_size=(5,5),
            padding=(0,0),
            stride=1
        )
        self.conv2 = nn.Conv2d(
            in_channels=6,
            out_channels=16,
            kernel_size=(5,5),
            padding=(0,0),
            stride=1
        )
        self.conv3 = nn.Conv2d(
            in_channels=16,
            out_channels=120,
            kernel_size=(5,5),
            padding=(0,0),
            stride=1
        )       

        self.fc1 = nn.Linear(
            in_features=120,
            out_features=84
        )

        self.fc2 = nn.Linear(
            in_features=84,
            out_features=10
        )
        self.net()
        
    def net(self):
        self.network = nn.Sequential(
            self.conv1,
            nn.ReLU(),
            self.AvgPool,
            self.conv2,
            nn.ReLU(),
            self.AvgPool,
            self.conv3,
            nn.ReLU(),
            nn.Flatten(),
            self.fc1,
            nn.ReLU(),
            self.fc2
        )
        
    def forward(self, x):
        return self.network(x)
## input: (N, 1, 32, 32)
## output: (N, 10)
            

In [23]:
lenet = LeNet5().to(device)
print(lenet)

LeNet5(
  (AvgPool): AvgPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0)
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=120, out_features=84, bias=True)
  (fc2): Linear(in_features=84, out_features=10, bias=True)
  (network): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): AvgPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): AvgPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0)
    (6): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
    (7): ReLU()
    (8): Flatten(start_dim=1, end_dim=-1)
    (9): Linear(in_features=120, out_features=84, bias=True)
    (10): ReLU()
    (11): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [48]:
vgg = torchvision.models.vgg19()

In [50]:
for m in vgg.modules():
    print(m)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [49]:
print(vgg)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [47]:
for m in lenet.modules():
    if isinstance(m, nn.ReLU):
        nn.init.kaiming_uniform_(m.weight)
        if m.bias is not None:
            nn.init.constant_(m.bais,0)
        print(m)
    if isinstance(m, nn.AvgPool2d):
        print(m)
    if isinstance(m, nn.Conv2d):
        print(m)
    if isinstance(m, nn.Flatten):
        print(m)

AvgPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0)
Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
ReLU()
ReLU()
ReLU()
Flatten(start_dim=1, end_dim=-1)
ReLU()


In [24]:
x = torch.rand(64, 1, 32, 32).to(device)
print(lenet(x).shape)

torch.Size([64, 10])


In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((32,32)),
    transforms.Normalize(mean=(0.1307,), std=(0.3081))
])

In [16]:
config={
    "num_epoch":1,
    "batch_size":32,
    "l_r":1e-3,
    
}

In [26]:
dataset = datasets.MNIST(root="dataset/", transform=transform, download=True)
train_set, valid_set = torch.utils.data.random_split(dataset,[50000, 10000])
train_dataloader = DataLoader(train_set, batch_size=config["batch_size"], shuffle=True)
valid_dataloader = DataLoader(valid_set, batch_size=config["batch_size"], shuffle=True)

In [27]:
optimizer = optim.Adam(lenet.parameters(), lr=config["l_r"])

In [28]:
criterion = nn.CrossEntropyLoss()

In [29]:
def check_acc(model, x, y):
    out = model(x)
    pred = torch.argmax(out, dim=1).squeeze()
    filt = (pred == y)
    acc = torch.sum(filt)/len(x)
    return acc


In [38]:
def valid(model, valid_dataloader):
    model.eval()
    num_right = 0
    for x, y in valid_dataloader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)
        pred = torch.argmax(out, dim=1).squeeze()
        filt = (pred == y)
        num_right += torch.sum(filt)
    acc = num_right / len(valid_dataloader)
    return acc

In [33]:
x, y = next(iter(train_dataloader))
def overfit_small_batch(x, y, model):
    x = x.to(device)
    y = y.to(device)
    iter_num = 100
    for iter in range(iter_num):
        pred = model(x)
        loss = criterion(pred, y)

        lenet.zero_grad()
        loss.backward()
        optimizer.step()

    acc = check_acc(lenet, x, y)
    return acc
 
overfit_small_batch(x, y, lenet)

tensor(1., device='cuda:0')

In [36]:
from torch.utils.tensorboard import SummaryWriter


writer = SummaryWriter(log_dir="run/LeNet5")

In [39]:
batch_step = 0
valid_step = 0
for epoch in range(config["num_epoch"]):
    for batch_idx, (x, y) in enumerate(train_dataloader):
        batch_step += 1

        x = x.to(device)
        y = y.to(device)
        
        pred = lenet(x)
        loss = criterion(pred, y)

        lenet.zero_grad()
        loss.backward()
        optimizer.step()
        
        # tensorboard write loss and acc
        writer.add_scalar(tag="loss", scalar_value=loss, global_step=batch_step)

        if batch_step % 20 == 0:
            acc = valid(lenet, valid_dataloader)    
            writer.add_scalar(tag="acc", scalar_value=acc, global_step=valid_step)

In [41]:
! tensorboard --logdir=run

TensorFlow installation not found - running with reduced feature set.

NOTE: Using experimental fast data loading logic. To disable, pass
    "--load_fast=false" and report issues on GitHub. More details:
    https://github.com/tensorflow/tensorboard/issues/4784

I0111 21:35:42.016285 139886829111040 plugin.py:346] Monitor runs begin
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.7.0 at http://localhost:6006/ (Press CTRL+C to quit)
^C
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
  File "/home/starfish/anaconda3/envs/snake/lib/python3.7/multiprocessing/managers.py", line 811, in _callmethod
    conn = self._tls.connection
AttributeError: 'ForkAwareLocal' object has no attribute 'connection'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/starfish/anaconda3/envs/snake/lib/python3.7/multiprocessing/connection.py", line 620, in SocketClient


In [None]:
# after model can is nice, we can visualize the img incorrectly classfied
