In [21]:
import torch
from torch import nn

from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [22]:
training_data = datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor())

test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor())

In [23]:
batch_size = 100
# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [24]:
class FashionNet(nn.Module):
    
    def __init__(self,):
        super(FashionNet, self).__init__()
        self.flatten = nn.Flatten()
        
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512, bias=False),
            nn.ReLU(), 
            # nn.Linear(512, 512),
            # nn.ReLU(),
            nn.Linear(512, 10, bias=False),
            nn.LogSoftmax(dim=1)
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [25]:
class FashionCNN(nn.Module):
    
    def __init__(self):
        super(FashionCNN, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.fc1 = nn.Linear(in_features=64*6*6, out_features=600)
        self.drop = nn.Dropout(0.25)
        self.fc2 = nn.Linear(in_features=600, out_features=120)
        self.fc3 = nn.Linear(in_features=120, out_features=10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.drop(out)
        out = self.fc2(out)
        out = self.fc3(out)
        
        return out

In [26]:
# model = FashionNet().to(device)
model = FashionCNN().to(device)
print(model)

FashionCNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2304, out_features=600, bias=True)
  (drop): Dropout(p=0.25, inplace=False)
  (fc2): Linear(in_features=600, out_features=120, bias=True)
  (fc3): Linear(in_features=120, out_features=10, bias=True)
)


In [27]:
loss_function = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0)

In [28]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # back-propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [29]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [30]:
epochs = 10
from tqdm import trange

for t in trange(epochs):
    print(f'Epoch {t+1} \n----------------------------------------')
    train(train_dataloader, model, loss_function, optim)
    test(test_dataloader, model, loss_function)
    
print('Done \n')

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1 
----------------------------------------
loss: 2.348854  [    0/60000]
loss: 1.874952  [10000/60000]
loss: 1.532957  [20000/60000]
loss: 1.223015  [30000/60000]
loss: 1.048707  [40000/60000]
loss: 0.991906  [50000/60000]


 10%|█         | 1/10 [00:06<00:59,  6.66s/it]

Test Error: 
 Accuracy: 74.9%, Avg loss: 0.885910 

Epoch 2 
----------------------------------------
loss: 0.840822  [    0/60000]
loss: 0.749423  [10000/60000]
loss: 0.761563  [20000/60000]
loss: 0.735106  [30000/60000]
loss: 0.630117  [40000/60000]
loss: 0.719002  [50000/60000]


 20%|██        | 2/10 [00:13<00:53,  6.65s/it]

Test Error: 
 Accuracy: 77.8%, Avg loss: 0.669315 

Epoch 3 
----------------------------------------
loss: 0.571054  [    0/60000]
loss: 0.557339  [10000/60000]
loss: 0.558707  [20000/60000]
loss: 0.609897  [30000/60000]
loss: 0.545361  [40000/60000]
loss: 0.630174  [50000/60000]


 30%|███       | 3/10 [00:19<00:46,  6.64s/it]

Test Error: 
 Accuracy: 79.4%, Avg loss: 0.589404 

Epoch 4 
----------------------------------------
loss: 0.464663  [    0/60000]
loss: 0.487718  [10000/60000]
loss: 0.482583  [20000/60000]
loss: 0.566325  [30000/60000]
loss: 0.506508  [40000/60000]
loss: 0.591364  [50000/60000]


 40%|████      | 4/10 [00:26<00:39,  6.63s/it]

Test Error: 
 Accuracy: 80.9%, Avg loss: 0.542144 

Epoch 5 
----------------------------------------
loss: 0.421303  [    0/60000]
loss: 0.461275  [10000/60000]
loss: 0.458662  [20000/60000]
loss: 0.545169  [30000/60000]
loss: 0.498443  [40000/60000]
loss: 0.573027  [50000/60000]


 50%|█████     | 5/10 [00:33<00:33,  6.63s/it]

Test Error: 
 Accuracy: 82.2%, Avg loss: 0.508486 

Epoch 6 
----------------------------------------
loss: 0.363762  [    0/60000]
loss: 0.439028  [10000/60000]
loss: 0.440961  [20000/60000]
loss: 0.533592  [30000/60000]
loss: 0.489181  [40000/60000]
loss: 0.543454  [50000/60000]


 60%|██████    | 6/10 [00:39<00:26,  6.65s/it]

Test Error: 
 Accuracy: 83.2%, Avg loss: 0.482595 

Epoch 7 
----------------------------------------
loss: 0.324518  [    0/60000]
loss: 0.414605  [10000/60000]
loss: 0.409528  [20000/60000]
loss: 0.509687  [30000/60000]
loss: 0.467854  [40000/60000]
loss: 0.520250  [50000/60000]


 70%|███████   | 7/10 [00:46<00:19,  6.64s/it]

Test Error: 
 Accuracy: 83.8%, Avg loss: 0.461689 

Epoch 8 
----------------------------------------
loss: 0.311366  [    0/60000]
loss: 0.391724  [10000/60000]
loss: 0.381753  [20000/60000]
loss: 0.507518  [30000/60000]
loss: 0.480034  [40000/60000]
loss: 0.499158  [50000/60000]


 80%|████████  | 8/10 [00:53<00:13,  6.64s/it]

Test Error: 
 Accuracy: 84.6%, Avg loss: 0.444239 

Epoch 9 
----------------------------------------
loss: 0.281784  [    0/60000]
loss: 0.380443  [10000/60000]
loss: 0.371230  [20000/60000]
loss: 0.495362  [30000/60000]
loss: 0.481678  [40000/60000]
loss: 0.482577  [50000/60000]


 90%|█████████ | 9/10 [00:59<00:06,  6.62s/it]

Test Error: 
 Accuracy: 85.1%, Avg loss: 0.429582 

Epoch 10 
----------------------------------------
loss: 0.275389  [    0/60000]
loss: 0.368399  [10000/60000]
loss: 0.375664  [20000/60000]
loss: 0.481281  [30000/60000]
loss: 0.459809  [40000/60000]
loss: 0.463594  [50000/60000]


100%|██████████| 10/10 [01:06<00:00,  6.63s/it]

Test Error: 
 Accuracy: 85.5%, Avg loss: 0.417044 

Done 




