In [1]:
%config Completer.use_jedi = False

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import transforms, datasets

In [3]:
train_data_augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5)
    ])

val_data_augmentation = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5)
    ])

In [6]:
train_dataset = datasets.FashionMNIST(root='./', train=True, download=True, transform=train_data_augmentation)
val_dataset = datasets.FashionMNIST('./', train=False, download=True, transform=val_data_augmentation)

In [27]:
train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=4)
val_loader = torch.utils.data.DataLoader(val_dataset, shuffle=False, batch_size=4)

In [20]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()  
        self.conv = nn.Conv2d(1, 10, 3)
        self.conv_bn = nn.BatchNorm2d(10)
        self.dropout = torch.nn.Dropout2d(0.5)
        self.fc1 = torch.nn.Linear(1690, 100)
        self.fc1_bn = nn.BatchNorm1d(100)
        self.fc2 = torch.nn.Linear(100, 10)
        
    def forward(self, x):
        x = self.conv(x)
        x = F.relu(x)
        x = F.max_pool2d(self.conv_bn(x), 2)
        x = self.dropout(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(self.fc1_bn(x))
        output = F.log_softmax(x, dim=1)
        return output

net = Net()
print(net)

Net(
  (conv): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv_bn): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=1690, out_features=100, bias=True)
  (fc1_bn): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


In [21]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [22]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in net.state_dict():
    print(param_tensor, "\t", net.state_dict()[param_tensor].size())

print()

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
conv.weight 	 torch.Size([10, 1, 3, 3])
conv.bias 	 torch.Size([10])
conv_bn.weight 	 torch.Size([10])
conv_bn.bias 	 torch.Size([10])
conv_bn.running_mean 	 torch.Size([10])
conv_bn.running_var 	 torch.Size([10])
conv_bn.num_batches_tracked 	 torch.Size([])
fc1.weight 	 torch.Size([100, 1690])
fc1.bias 	 torch.Size([100])
fc1_bn.weight 	 torch.Size([100])
fc1_bn.bias 	 torch.Size([100])
fc1_bn.running_mean 	 torch.Size([100])
fc1_bn.running_var 	 torch.Size([100])
fc1_bn.num_batches_tracked 	 torch.Size([])
fc2.weight 	 torch.Size([10, 100])
fc2.bias 	 torch.Size([10])

Optimizer's state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}]


In [23]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)

Net(
  (conv): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv_bn): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout2d(p=0.5, inplace=False)
  (fc1): Linear(in_features=1690, out_features=100, bias=True)
  (fc1_bn): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)

In [29]:
for epoch in range(1):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,  2000] loss: 0.794
[1,  4000] loss: 0.778
[1,  6000] loss: 0.763
[1,  8000] loss: 0.800
[1, 10000] loss: 0.742
[1, 12000] loss: 0.762
[1, 14000] loss: 0.752
Finished Training


In [30]:
correct = 0
total = 0
with torch.no_grad():
    for data in val_loader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 72 %
