In [0]:
import torch

In [2]:
device = torch.device("cuda:0")
device

device(type='cuda', index=0)

In [0]:
N, D_in, H, D_out = 64, 1000, 100, 10

x = torch.randn(N, D_in, device = device)
y = torch.randn(N, D_out, device = device)
w1 = torch.randn(D_in, H, device = device, requires_grad = True)
w2 = torch.randn(H, D_out, device = device, requires_grad = True)

eps = 1e-6
for t in range(500):
  y_pred = x.mm(w1).clamp(min = 0).mm(w2)
  loss = (y_pred - y).pow(2).sum()

  loss.backward()

  with torch.no_grad():
    w1 -= eps * w1.grad
    w2 -= eps * w2.grad

    w1.grad.zero_()
    w2.grad.zero_()

In [0]:
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H), 
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out)
).cuda()

for t in range(500):
  y_pred = model(x)
  loss = torch.nn.functional.mse_loss(y_pred, y)

  loss.backward()

  with torch.no_grad():
    for param in model.parameters():
      param -= eps * param.grad

    model.zero_grad()

In [0]:
# lets try a new block!! :)

class ParallelBlock(torch.nn.Module):
  def __init__(self, D_in, D_out):
    super(ParallelBlock, self).__init__()
    self.linear1 = torch.nn.Linear(D_in, D_out)
    self.linear2 = torch.nn.Linear(D_in, D_out)

  def forward(self, x):
    h1 = self.linear1(x)
    h2 = self.linear2(x)
    return (h1 * h2).clamp(min = 0)

model = torch.nn.Sequential(
    ParallelBlock(D_in, H),
    ParallelBlock(H, H),
    torch.nn.Linear(H, D_out)
).cuda()

optimizer = torch.optim.Adam(model.parameters(), lr = eps)

for t in range(500):
  y_pred = model(x)
  loss = torch.nn.functional.mse_loss(y_pred, y)

  loss.backward()

  optimizer.step()
  optimizer.zero_grad()

In [0]:
!tar -xvzf mnist_png.tar.gz

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
mnist_png/training/1/34777.png
mnist_png/training/1/35995.png
mnist_png/training/1/2552.png
mnist_png/training/1/32153.png
mnist_png/training/1/4486.png
mnist_png/training/1/59521.png
mnist_png/training/1/44454.png
mnist_png/training/1/26096.png
mnist_png/training/1/29840.png
mnist_png/training/1/47153.png
mnist_png/training/1/12226.png
mnist_png/training/1/36185.png
mnist_png/training/1/18781.png
mnist_png/training/1/28804.png
mnist_png/training/1/8578.png
mnist_png/training/1/7693.png
mnist_png/training/1/58481.png
mnist_png/training/1/1627.png
mnist_png/training/1/27256.png
mnist_png/training/1/32038.png
mnist_png/training/1/21424.png
mnist_png/training/1/19966.png
mnist_png/training/1/48611.png
mnist_png/training/1/10346.png
mnist_png/training/1/7317.png
mnist_png/training/1/22632.png
mnist_png/training/1/25327.png
mnist_png/training/1/56750.png
mnist_png/training/1/1601.png
mnist_png/training/1/32378.png
mnist_png/tr

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import glob
import os.path as osp
import numpy as np
from PIL import Image

In [0]:
class MNIST(Dataset):
  def __init__(self, root, preload = False, transform = None):
    self.root = root
    self.filenames = []
    self.images = None
    self.labels = None
    self.transform = transform

    # read filenames
    for i in range(10):
      filenames = glob.glob(osp.join(root, str(i), '*.png'))
      for fn in filenames:
        self.filenames.append((fn, i))

    # preload dataset into memory
    if preload:
      self._preload()

    # length of dataset
    self.len = len(filenames)

  def _preload(self):
    self.labels = []
    self.images = []

    for image_fn, label in self.filenames:
      image = Image.open(image_fn)
      self.images.append(image.copy())
      image.close()
      self.labels.append(label)

  def __getitem__(self, index):
    if self.images is not None:
      image = self.images[index]
      label = self.labels[index]
    else:
      # on demand data loading
      image_fn, label = self.filenames[index]
      image = Image.open(image_fn)

    if self.transform is not None:
      image = self.transform(image)
    
    return image, label

  def __len__(self):
    return self.len

In [0]:
trainset = MNIST(
    root = "mnist_png/training",
    transform = transforms.ToTensor(),
    preload = True
)

In [0]:
trainset_loader = DataLoader(trainset, batch_size = 64, shuffle = True, num_workers = 1)

In [0]:
testset = MNIST(
    root = "mnist_png/testing",
    transform = transforms.ToTensor(),
    preload = True
)

testset_loader = DataLoader(testset, batch_size = 1000, shuffle = False, num_workers = 1)

In [0]:
print(len(trainset))
print(len(testset))

5949
1009


In [0]:
use_cuda = torch.cuda.is_available()
torch.manual_seed(123)
device = torch.device("cuda:0" if use_cuda else "cpu")
print(device)

cuda:0


In [0]:
dataiter = iter(trainset_loader)
images, labels = dataiter.next()

labels # gives out 64 stuffs
images[0].shape

torch.Size([1, 28, 28])

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [0]:
def train(epoch, log_interval=100):
    model.train()  # set training mode
    iteration = 0
    for ep in range(epoch):
        for batch_idx, (data, target) in enumerate(trainset_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            if iteration % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    ep, batch_idx * len(data), len(trainset_loader.dataset),
                    100. * batch_idx / len(trainset_loader), loss.item()))
            iteration += 1
        test()

In [0]:
def test():
    model.eval()  # set evaluation mode
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in testset_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(testset_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(testset_loader.dataset),
        100. * correct / len(testset_loader.dataset)))

In [0]:
train(5)


Test set: Average loss: 0.2661, Accuracy: 980/1009 (97%)






Test set: Average loss: 0.2025, Accuracy: 980/1009 (97%)


Test set: Average loss: 0.1593, Accuracy: 980/1009 (97%)


Test set: Average loss: 0.1408, Accuracy: 980/1009 (97%)


Test set: Average loss: 0.1343, Accuracy: 980/1009 (97%)



In [0]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'state_dict': model.state_dict(),
             'optimizer' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to %s' % checkpoint_path)
    
def load_checkpoint(checkpoint_path, model, optimizer):
    state = torch.load(checkpoint_path)
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    print('model loaded from %s' % checkpoint_path)

In [0]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
test()


Test set: Average loss: 2.2348, Accuracy: 0/1009 (0%)





In [0]:
# but now, i shall utilise checkpointing :)

def train_save(epoch, save_interval, log_interval):
  model.train() # trainmode
  iteration = 0
  for ep in range(epoch):
    for batch_idx, (data, target) in enumerate(trainset_loader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()
      op = model(data)
      loss = F.nll_loss(op, target)
      loss.backward()
      optimizer.step()
      if iteration % log_interval == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(ep, batch_idx * len(data), len(trainset_loader.dataset), 
                      100. * batch_idx / len(trainset_loader), loss.item()))
      if iteration % save_interval == 0:
        save_checkpoint("mnist-%i.pth" % iteration, model, optimizer)

      iteration += 1
    
    test()

  save_checkpoint("mnist-%i.pth" % iteration, model, optimizer)

In [0]:
train_save(2, 500, 100)

model saved to mnist-0.pth

Test set: Average loss: 0.2523, Accuracy: 980/1009 (97%)






Test set: Average loss: 0.1928, Accuracy: 980/1009 (97%)

model saved to mnist-186.pth


In [0]:
# create a new model
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# load from the final checkpoint
load_checkpoint('mnist-186.pth', model, optimizer)
# should give you the final model accuracy
test()

model loaded from mnist-186.pth

Test set: Average loss: 0.1928, Accuracy: 980/1009 (97%)





In [0]:
print(model.state_dict().keys())

odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias'])


In [0]:
checkpoint = torch.load('mnist-186.pth')
states_to_load = {}
for name, param in checkpoint['state_dict'].items():
    if name.startswith('conv'):
        states_to_load[name] = param

In [0]:
states_to_load.keys()

dict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias'])

In [0]:
model_state = model.state_dict()

model_state.update(states_to_load)

In [0]:
model = Net().to(device)
model.load_state_dict(model_state)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [0]:
model.state_dict().keys()

odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias'])

In [0]:
train(1)  # training 1 epoch will get you to 93%!


Test set: Average loss: 0.2155, Accuracy: 980/1009 (97%)





In [0]:
!wget http://www.cs.toronto.edu/~faghri/vsepp/runs.tar

--2020-05-08 20:52:42--  http://www.cs.toronto.edu/~faghri/vsepp/runs.tar
Resolving www.cs.toronto.edu (www.cs.toronto.edu)... 128.100.3.30
Connecting to www.cs.toronto.edu (www.cs.toronto.edu)|128.100.3.30|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7657175040 (7.1G) [application/x-tar]
Saving to: ‘runs.tar’

runs.tar             20%[===>                ]   1.44G  29.2MB/s    eta 3m 17s ^C
