In [1]:
%config IPCompleter.greedy=True

In [2]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms.transforms as transforms
import torch.nn.functional as func

print(torch.cuda.is_available())
print(torch.__version__)

True
0.4.1


In [3]:
train_set= torchvision.datasets.FashionMNIST(
    train= True,
    root= './data/FashionMNIST/train/',
    download= True,
    transform= transforms.Compose([transforms.ToTensor()])
)
test_set= torchvision.datasets.FashionMNIST(
    train= False,
    root= './data/FashionMNIST/test/',
    download= True,
    transform= transforms.Compose([transforms.ToTensor()])
)
print("train set:", len(train_set))
print("test set:", len(test_set))

train set: 60000
test set: 10000


In [4]:
# Network
# formula for channels 

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        # Convolutional layer1
        self.conv1= nn.Sequential()
        self.conv1.add_module("conv1", nn.Conv2d(in_channels= 1, out_channels= 6, kernel_size= 5))
        self.conv1.add_module("bn1", nn.BatchNorm2d(num_features= 6, eps= 1e-05, momentum= 0.1, affine= True))
        self.conv1.add_module("relu", nn.ReLU(inplace= False))
        self.conv1.add_module("pool", nn.MaxPool2d(kernel_size= 2, stride= 2))

        # Convolutional layer2
        self.conv2= nn.Sequential()
        self.conv2.add_module("conv2", nn.Conv2d(in_channels= 6, out_channels= 12, kernel_size= 5))
        self.conv2.add_module("bn2", nn.BatchNorm2d(num_features= 12, eps= 1e-05, momentum= 0.1, affine= True))
        self.conv2.add_module("relu", nn.ReLU(inplace= False))
        self.conv2.add_module("pool", nn.MaxPool2d(kernel_size= 2, stride= 2))

        # Linear layer1
        self.fc1= nn.Sequential()
        self.fc1.add_module("linear", nn.Linear(in_features= 12*4*4, out_features= 120))
        #  self.fc1.add_module("bn3", nn.BatchNorm1d(num_features= 120, eps= 1e-05, momentum= 0.1, affine= True))
        self.fc1.add_module("relu", nn.ReLU(inplace= False))
        
        # Linear layer2
        self.fc2= nn.Sequential()
        self.fc2.add_module("linear", nn.Linear(in_features= 120, out_features= 60))
        #self.fc2.add_module("bn4", nn.BatchNorm1d(num_features= 60, eps= 1e-5, momentum= 0.1, affine= True))
        self.fc2.add_module("relu", nn.ReLU(inplace= False))

        # Output layer
        self.out= nn.Sequential()
        self.out.add_module("output", nn.Linear(in_features= 60, out_features= 10))
        # by default activation function is softmax

    def forward(self, t):
        t= t
        t= self.conv1(t)
        t= self.conv2(t)
        t= t.reshape(-1, 12*4*4)
        t= self.fc1(t)
        t= self.fc2(t)
        t= self.out(t)
        return t

In [5]:
# Instance of Network
network= Network()
network

Network(
  (conv1): Sequential(
    (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (bn1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
    (bn2): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Sequential(
    (linear): Linear(in_features=192, out_features=120, bias=True)
    (relu): ReLU()
  )
  (fc2): Sequential(
    (linear): Linear(in_features=120, out_features=60, bias=True)
    (relu): ReLU()
  )
  (out): Sequential(
    (output): Linear(in_features=60, out_features=10, bias=True)
  )
)

In [6]:
train_loader= torch.utils.data.DataLoader(train_set, batch_size= 10)

In [7]:
network= Network()

train_loader= torch.utils.data.DataLoader(train_set, batch_size= 1000)
test_loader= torch.utils.data.DataLoader(test_set, batch_size= 1000)

train_batch= next(iter(train_loader))
images, labels= train_batch
preds= network(images)
train_loss= func.cross_entropy(preds, labels)

test_batch= next(iter(test_loader))
images, labels= test_batch
preds= network(images)
test_loss= func.cross_entropy(preds, labels)

optimizer= optim.Adam(network.parameters(), lr= 0.01)

print('train_loss:', train_loss.item())
print('test_loss:', test_loss.item())

train_loss: 2.3143973350524902
test_loss: 2.3147315979003906


In [39]:
# train test epochs
device= torch.device('cuda')
network= Network()
train_loader= torch.utils.data.DataLoader(train_set, batch_size= 100)
test_loader= torch.utils.data.DataLoader(test_set, batch_size= 10000)
optimizer= optim.Adam(network.parameters(), lr= 0.1)
test_batch= next(iter(test_loader))

# gpu specific
network.cuda()

epochs= 10
for epoch in range(1, epochs):
    i= 0
    for batch in train_loader:
        i+=1
        images, labels= batch
        # .to(cuda) for GPU
        images= images.to('cuda')
        labels= labels.to('cuda')
        preds= network(images)
        train_loss= func.cross_entropy(preds, labels)

        optimizer.zero_grad()
        train_loss.backward(retain_graph=True)
        optimizer.step()
        
        images, labels= test_batch
        # .to(cuda) for GPU
        images= images.to('cuda')
        labels= labels.to('cuda')
        preds= network(images)
        test_loss= func.cross_entropy(preds, labels)
        print('training on epoch: {} and batch: {}, train loss: {:.2f}, test loss: {:.2f}'.format(epoch, i, train_loss.item(), test_loss.item()))



training on epoch: 1 and batch: 1, train loss: 2.31, test loss: 13.34
training on epoch: 1 and batch: 2, train loss: 14.34, test loss: 6.23
training on epoch: 1 and batch: 3, train loss: 6.84, test loss: 3.68
training on epoch: 1 and batch: 4, train loss: 3.76, test loss: 2.56
training on epoch: 1 and batch: 5, train loss: 2.57, test loss: 2.31
training on epoch: 1 and batch: 6, train loss: 2.35, test loss: 2.31
training on epoch: 1 and batch: 7, train loss: 2.33, test loss: 2.31
training on epoch: 1 and batch: 8, train loss: 2.26, test loss: 2.32
training on epoch: 1 and batch: 9, train loss: 2.34, test loss: 2.32
training on epoch: 1 and batch: 10, train loss: 2.28, test loss: 2.29
training on epoch: 1 and batch: 11, train loss: 2.32, test loss: 2.32
training on epoch: 1 and batch: 12, train loss: 2.28, test loss: 2.20
training on epoch: 1 and batch: 13, train loss: 2.23, test loss: 2.23
training on epoch: 1 and batch: 14, train loss: 2.22, test loss: 2.21
training on epoch: 1 and ba

KeyboardInterrupt: 

In [38]:
# final testing:
def get_num_correct(preds, labels):
    return torch.argmax(preds, dim= 1).eq(labels).sum()
test_loader= torch.utils.data.DataLoader(test_set, batch_size= 1)
images, labels= test_batch
images= images.to('cuda')
labels= labels.to('cuda')
preds= network(images)
print('correctly classified from 10000 images:', get_num_correct(preds, labels).item())

RuntimeError: CUDA error: out of memory

In [13]:
# saving the torch model
PATH= 'model'
torch.save(network.state_dict(), PATH)

OSError: [Errno 22] Invalid argument: 'model/'

In [None]:
# plotting the confusion matrix:


In [0]:
# Summary
from torch import optim

# train dataset download
train_set= torchvision.datasets.FashionMNIST(
    root= './data/FashionMNIST',
    train= True,
    download= True,
    transform= transforms.Compose([
        transforms.ToTensor()
    ])
)

# network instance
network= Network()

# train dataset loader
train_loader= torch.utils.data.DataLoader(train_set, batch_size= 10)

# optimizer function
optimizer= optim.Adam(network.parameters(), lr=0.01)

# load the first batch and iterate there after
batch = next(iter(data_loader))
images, labels= batch

# prediction before anything
preds= network(images)
print('previous prediction', accuracy(preds, labels))

# initialize the loss function
loss= nn.functional.cross_entropy(preds, labels)
print('previous loss:', loss.item())

# initialize backward propagation
loss.backward() # calculate gradients
optimizer.step() # update weights

# prediction after updating weights
preds= network(images)
loss= nn.functional.cross_entropy(preds, labels)
print('new loss:', loss.item())
print('new prediction:', accuracy(preds, labels))
