In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils import data
from PIL import Image

torch.set_printoptions(linewidth=120)  # Display options for output
torch.set_grad_enabled(True)

from torch.utils.tensorboard import SummaryWriter
# from tensorboardX import SummaryWriter
# import sys
# print(sys.executable)


In [2]:
print(torch.__version__)
print(torchvision.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)
print(torch.cuda.device_count())

1.5.1+cu101
0.6.1+cu101
True
10.1
1


In [3]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [4]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        # self.layer = None

    def forward(self, t):
        # t = self.layer(t)
        # implement the forward pass

        # (1) input layer
        t = t

        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)

        # (4) hidden liner layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)

        # (5) hidden liner layer
        t = self.fc2(t)
        t = F.relu(t)

        # (6) output layer
        t = self.out(t)
        # t = F.softmax(t, dim=1)

        return t

In [5]:
train_set = torchvision.datasets.FashionMNIST(root='./data/FashionMNIST', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))

In [6]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

In [7]:
tb = SummaryWriter()

network = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

In [6]:
from itertools import product

In [8]:
parameters = dict(
    lr=[.01, .001]
    , batch_size=[10, 100, 1000]
    , shuffle=[True, False]
)

In [9]:
para_values = [v for v in parameters.values()]
para_values

[[0.01, 0.001], [10, 100, 1000], [True, False]]

In [11]:
for lr, batch_size, shuffle in product(*para_values):
    print(lr, batch_size, shuffle)

0.01 10 True
0.01 10 False
0.01 100 True
0.01 100 False
0.01 1000 True
0.01 1000 False
0.001 10 True
0.001 10 False
0.001 100 True
0.001 100 False
0.001 1000 True
0.001 1000 False


In [12]:
for lr, batch_size, shuffle in product(*para_values):
    comment = f'batch_zize={batch_size} lr={lr} shuffle={shuffle}'

    #  Training progress given the set of parameters

In [13]:
network = Network()

In [None]:

# batch_size = 100
# lr = 0.01
# batch_size_list = [100, 1000, 10000]
# lt_list = [.01, .001, .0001, .00001]
for lr, batch_size, shuffle in product(*para_values):
    comment = f'batch_zize={batch_size} lr={lr} shuffle={shuffle}'
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    optimizer = optim.Adam(network.parameters(), lr=lr)

    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)

    # comment = f'batch_zize={batch_size} lr={lr}'
    tb = SummaryWriter(comment=comment)
    tb.add_image('images', grid)
    tb.add_graph(network, images)

    for epoch in range(10):
        total_loss = 0
        total_correct = 0

        for batch in train_loader:  # get batch
            images, labels = batch
            preds = network(images)  #  pass batch
            loss = F.cross_entropy(preds, labels)  # calculate loss

            optimizer.zero_grad()
            loss.backward()  # calculate gradients
            optimizer.step()  # update weights

            total_loss += loss.item() * batch_size
            total_correct += get_num_correct(preds, labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number_correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
        tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
        tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)

        print("epoch: ", epoch, "total_correct:", total_correct, "loss", total_loss)

    tb.close()


epoch:  0 total_correct: 46214 loss 37052.890387880616
epoch:  1 total_correct: 48392 loss 31638.234238872537
epoch:  2 total_correct: 48874 loss 30788.25586864259
epoch:  3 total_correct: 48265 loss 32969.816212486476
epoch:  4 total_correct: 48509 loss 31854.83602920547
epoch:  5 total_correct: 48894 loss 31208.60786990961
epoch:  6 total_correct: 49040 loss 30533.553142123856
epoch:  7 total_correct: 48983 loss 31055.837457957387
epoch:  8 total_correct: 49071 loss 30932.74570671376
epoch:  9 total_correct: 49001 loss 30960.79490360804
epoch:  0 total_correct: 46549 loss 36736.30680216942
epoch:  1 total_correct: 48975 loss 30994.755089245737
epoch:  2 total_correct: 48973 loss 30882.59115791414
epoch:  3 total_correct: 49415 loss 30047.73499167408
epoch:  4 total_correct: 49011 loss 31396.26949705649
epoch:  5 total_correct: 48229 loss 33073.97709154524
epoch:  6 total_correct: 48427 loss 32384.98503467068
epoch:  7 total_correct: 48735 loss 31267.208658065647
epoch:  8 total_corre