In [1]:
import json
import time

import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from IPython.core.display_functions import clear_output
from torch.utils import data
from collections import OrderedDict
from collections import namedtuple
from itertools import product
from PIL import Image
from torch.utils.data import DataLoader

torch.set_printoptions(linewidth=120)  # Display options for output
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter
# from tensorboardX import SummaryWriter
# import sys
# print(sys.executable)


In [2]:
print(torch.__version__)
print(torchvision.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.cuda.device_count())

1.5.1+cu101
0.6.1+cu101
True
10.1
1


In [3]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [4]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        # self.layer = None

    def forward(self, t):
        # t = self.layer(t)
        # implement the forward pass

        # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (4) hidden liner layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden liner layer
        t = self.fc2(t)
        t = F.relu(t)

        # (6) output layer
        t = self.out(t)
        # t = F.softmax(t, dim=1)

        return t

In [5]:
train_set = torchvision.datasets.FashionMNIST(root='./data/FashionMNIST', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))

In [6]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

In [7]:
tb = SummaryWriter()

network = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

In [8]:
from itertools import product

In [9]:
parameters = dict(
    lr=[.01, .001]
    , batch_size=[10, 100, 1000]
    , shuffle=[True, False]
)

In [10]:
para_values = [v for v in parameters.values()]
para_values

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [11]:
for lr, batch_size, shuffle in product(*para_values):
    print(lr, batch_size, shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [12]:
for lr, batch_size, shuffle in product(*para_values):
    comment = f'batch_zize={batch_size} lr={lr} shuffle={shuffle}'

    #  Training progress given the set of parameters

In [13]:
network = Network()

In [14]:

# batch_size = 100
# lr = 0.01
# batch_size_list = [100, 1000, 10000]
# lt_list = [.01, .001, .0001, .00001]
# for lr, batch_size, shuffle in product(*para_values):
#     comment = f'batch_zize={batch_size} lr={lr} shuffle={shuffle}'
#     network = Network()
#     train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
#     optimizer = optim.Adam(network.parameters(), lr=lr)
# 
#     images, labels = next(iter(train_loader))
#     grid = torchvision.utils.make_grid(images)
# 
#     # comment = f'batch_zize={batch_size} lr={lr}'
#     tb = SummaryWriter(comment=comment)
#     tb.add_image('images', grid)
#     tb.add_graph(network, images)
# 
#     for epoch in range(10):
#         total_loss = 0
#         total_correct = 0
# 
#         for batch in train_loader:  # get batch
#             images, labels = batch
#             preds = network(images)  #  pass batch
#             loss = F.cross_entropy(preds, labels)  # calculate loss
# 
#             optimizer.zero_grad()
#             loss.backward()  # calculate gradients
#             optimizer.step()  # update weights
# 
#             total_loss += loss.item() * batch_size
#             total_correct += get_num_correct(preds, labels)
# 
#         tb.add_scalar('Loss', total_loss, epoch)
#         tb.add_scalar('Number_correct', total_correct, epoch)
#         tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
# 
#         tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
#         tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
#         tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
# 
#         print("epoch: ", epoch, "total_correct:", total_correct, "loss", total_loss)
# 
#     tb.close()


In [15]:
class RunBuilder:
    @staticmethod
    def get_runs(params):
        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [16]:
class RunManager:
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = 0

        self.run_params = 0
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        self.network = None
        self.loader = None
        self.tb = None

    def begin_run(self, run, network, loader):
        self.run_start_time = time.time()
        self.run_params = run
        self.run_count += 1

        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'{run}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images)

    def end_run(self):
        self.tb.close()
        self.epoch_count = 0

    def begin_epoch(self):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

    def end_epoch(self):
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time

        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)

        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration

        for k, v, in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')

        clear_output(wait=True)
        display(df)

    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)

    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    def save(self, filename):
        pd.DataFrame.from_dict(
            self.run_data
            , orient='columns'
        ).to_csv(f'{filename}.csv')

        with open(f'{filename}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [17]:
params = OrderedDict(
    lr=[.01]
    , batch_size=[100, 1000, 10000]
    , num_workers=[0, 1, 2, 4, 8]
    # , shuffle=[True, False]
)

m = RunManager()
for run in RunBuilder.get_runs(params):
    network = Network()
    loader = DataLoader(train_set, batch_size=run.batch_size, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)

    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            images = batch[0]
            labels = batch[1]
            preds = network(images)  # pass batch
            loss = F.cross_entropy(preds, labels)  # calculate loss
            optimizer.zero_grad()  # zero gradients
            loss.backward()  # calculate gradients
            optimizer.step()  # update weights

            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save('results')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,num_workers
0,1,1,0.526139,0.8039,11.224319,11.322321,0.01,100,0
1,2,1,0.563833,0.789667,8.349673,9.733183,0.01,100,1
2,3,1,0.561812,0.786983,7.539605,8.906035,0.01,100,2
3,4,1,0.561955,0.788817,7.824914,9.203557,0.01,100,4
4,5,1,0.575652,0.785967,7.565039,9.374189,0.01,100,8
5,6,1,0.953393,0.646583,7.218504,7.758574,0.01,1000,0
6,7,1,1.024711,0.598767,5.394361,6.994442,0.01,1000,1
7,8,1,1.039252,0.594833,4.189223,5.800005,0.01,1000,2
8,9,1,0.936998,0.633567,4.387878,6.061383,0.01,1000,4
9,10,1,0.917071,0.6524,4.896918,6.898695,0.01,1000,8


# Pytorch and the GPU : CUDA
## Moving to GPU 

In [18]:
t = torch.ones(1, 1, 28, 28)
network = Network()

In [19]:
t = t.cuda()
network = network.cuda()

In [20]:
gpu_pred = network(t)
gpu_pred.device

device(type='cuda', index=0)

# Working with Tersors

In [23]:
t1 = torch.tensor(
    [[1, 2],
     [3, 4]]
)
t2 = torch.tensor(
    [[5, 6],
     [7, 8]]
)

In [24]:
t1.device, t2.device

(device(type='cpu'), device(type='cpu'))

In [25]:
t1 = t1.to('cuda')
t1.device

device(type='cuda', index=0)

In [26]:
try:
    t1 + t2
except Exception as e:
    print(e)

expected device cuda:0 but got device cpu


In [27]:
try:
    t2 + t1
except Exception as e:
    print(e)

expected device cpu but got device cuda:0


In [28]:
t2 = t2.to('cuda')

In [29]:
t1 + t2

tensor([[ 6,  8],
        [10, 12]], device='cuda:0')

In [1]:
network=Network()

NameError: name 'Network' is not defined