In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.nn import Linear, ReLU, Sequential, Conv2d, MaxPool2d, Module,Flatten,  BatchNorm2d, CrossEntropyLoss
from torch.optim import Adam, SGD
import torch.optim as optim
import numpy as np
from torch.optim.lr_scheduler import *
from tqdm import tqdm
import torch.nn.functional as F
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Specify oldClasses Below


In [None]:
oldClasses = [0,1,2,3,4,5]

# Below trainset and testset are both for K classes(part of all labels)

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor()])

batch_size = 64

trainsetOld = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloaderOld = torch.utils.data.DataLoader(trainsetOld, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testsetOld = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloaderOld = torch.utils.data.DataLoader(testsetOld, batch_size=batch_size,
                                         shuffle=False, num_workers=2)
print(np.unique(testsetOld.targets))

[0 1 2 3 4 5 6 7 8 9]


In [None]:
idx = torch.zeros(trainsetOld.targets.shape, dtype=torch.bool)
for k in oldClasses:
  idx = idx | (trainsetOld.targets==k)
trainsetOld.targets = trainsetOld.targets[idx]
trainsetOld.data = trainsetOld.data[idx]

## Modify Test data
idx = torch.zeros(testsetOld.targets.shape, dtype=torch.bool)
for k in oldClasses:
  idx = idx | (testsetOld.targets==k)
testsetOld.targets = testsetOld.targets[idx]
testsetOld.data = testsetOld.data[idx]

In [None]:
print(trainsetOld.data.shape, trainsetOld.targets.shape)
print(testsetOld.data.shape, testsetOld.targets.shape)
print(np.unique(trainsetOld.targets))

torch.Size([36000, 28, 28]) torch.Size([36000])
torch.Size([6000, 28, 28]) torch.Size([6000])
[0 1 2 3 4 5]


In [None]:
class CNN(Module):
  def __init__(self):
        super().__init__()
        self.network = Sequential(
            Conv2d(1, 32, kernel_size=3, padding=1),
            ReLU(),
            Conv2d(32, 64, kernel_size=3, padding=1),
            ReLU(),
            MaxPool2d(2, 2), # output: 64 x 16 x 16

            Conv2d(64, 128, kernel_size=3, padding=1),
            ReLU(),
            Conv2d(128, 128, kernel_size=3,padding=1),
            ReLU(),
            MaxPool2d(2, 2), # output: 128 x 8 x 8

            Conv2d(128, 256, kernel_size=3,padding=1),
            ReLU(),
            Conv2d(256, 256, kernel_size=3,padding=1),
            ReLU(),
            MaxPool2d(2, 2), # output: 256 x 4 x 4

            Flatten(), 
            Linear(1*2304, 1024),
            ReLU(),
            Linear(1024, 512),
            ReLU(),
            Linear(512, len(oldClasses)))
  def forward(self, xb):
      return self.network(xb)

In [None]:
model = CNN().cuda()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

In [None]:
def train(model, train_loader, optimizer):
    model = model.to(device)
    model.train()
    num_examples = 0
    correct = 0
    train_loss = 0
    for batch_idx, (data, target) in tqdm(enumerate(train_loader)):
        data, target = data.to(device), target.to(device)
        output = model(data)
        #print(data.shape,output.shape, target.shape)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(target.view_as(pred)).sum().item()
        train_loss += F.cross_entropy(output, target, reduction='sum').item()
        num_examples += len(data)

    train_loss /= num_examples
    train_acc = 100. * correct / num_examples
    print(f'Train set: Average loss: {train_loss:.4f}, '
          f'Accuracy: {correct}/{num_examples} ({train_acc:.2f}%)')
    return train_loss, train_acc
def test(model, test_loader):
    device = next(model.parameters()).device
    
    model.eval()
    num_examples = 0
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in tqdm(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
            num_examples += len(data)

    test_loss /= num_examples
    test_acc = 100. * correct / num_examples

    print(f'Test set: Average loss: {test_loss:.4f}, '
          f'Accuracy: {correct}/{num_examples} ({test_acc:.2f}%)')

    return test_loss, test_acc

In [None]:
from torchsummary import summary
summary(model, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
              ReLU-2           [-1, 32, 28, 28]               0
            Conv2d-3           [-1, 64, 28, 28]          18,496
              ReLU-4           [-1, 64, 28, 28]               0
         MaxPool2d-5           [-1, 64, 14, 14]               0
            Conv2d-6          [-1, 128, 14, 14]          73,856
              ReLU-7          [-1, 128, 14, 14]               0
            Conv2d-8          [-1, 128, 14, 14]         147,584
              ReLU-9          [-1, 128, 14, 14]               0
        MaxPool2d-10            [-1, 128, 7, 7]               0
           Conv2d-11            [-1, 256, 7, 7]         295,168
             ReLU-12            [-1, 256, 7, 7]               0
           Conv2d-13            [-1, 256, 7, 7]         590,080
             ReLU-14            [-1, 25

In [None]:
Epochs = 100
train_loss_lst=[]
train_acc_lst=[]
val_loss_lst=[]
val_acc_lst=[]
for epoch in range(0, Epochs):
    print(f"\nEpoch: {epoch}")
    train_loss, train_acc = train(model, trainloaderOld, optimizer)
    test_loss, test_acc = test(model, testloaderOld)
    train_loss_lst.append(train_loss)
    train_acc_lst.append(train_acc)
    val_loss_lst.append(test_loss)
    val_acc_lst.append(test_acc)
    print(train_loss_lst[-1])
    print(train_acc_lst[-1])
    print(val_loss_lst[-1])
    print(val_acc_lst[-1])
    scheduler.step()


Epoch: 0


563it [00:18, 30.53it/s]


Train set: Average loss: 1.7918, Accuracy: 6017/36000 (16.71%)


100%|██████████| 94/94 [00:01<00:00, 65.48it/s]

Test set: Average loss: 1.7916, Accuracy: 1000/6000 (16.67%)
1.7918403951856825
16.71388888888889
1.7916202672322592
16.666666666666668

Epoch: 1



563it [00:18, 31.14it/s]


Train set: Average loss: 1.7916, Accuracy: 6112/36000 (16.98%)


100%|██████████| 94/94 [00:01<00:00, 64.45it/s]

Test set: Average loss: 1.7915, Accuracy: 1000/6000 (16.67%)
1.7915954383214314
16.977777777777778
1.791451727549235
16.666666666666668

Epoch: 2



563it [00:17, 31.28it/s]


Train set: Average loss: 1.7914, Accuracy: 6290/36000 (17.47%)


100%|██████████| 94/94 [00:01<00:00, 65.18it/s]

Test set: Average loss: 1.7912, Accuracy: 1635/6000 (27.25%)
1.7914159545898438
17.47222222222222
1.7912364832560221
27.25

Epoch: 3



563it [00:18, 31.19it/s]


Train set: Average loss: 1.7911, Accuracy: 7152/36000 (19.87%)


100%|██████████| 94/94 [00:01<00:00, 64.87it/s]

Test set: Average loss: 1.7909, Accuracy: 1003/6000 (16.72%)
1.7911398760477701
19.866666666666667
1.7908511505126954
16.716666666666665

Epoch: 4



563it [00:18, 30.98it/s]


Train set: Average loss: 1.7905, Accuracy: 8113/36000 (22.54%)


100%|██████████| 94/94 [00:01<00:00, 64.14it/s]

Test set: Average loss: 1.7899, Accuracy: 2371/6000 (39.52%)
1.790538558324178
22.53611111111111
1.7899163347880045
39.516666666666666

Epoch: 5



563it [00:18, 31.24it/s]


Train set: Average loss: 1.7888, Accuracy: 9825/36000 (27.29%)


100%|██████████| 94/94 [00:01<00:00, 66.14it/s]

Test set: Average loss: 1.7867, Accuracy: 2746/6000 (45.77%)
1.7887595387564765
27.291666666666668
1.7866722361246745
45.766666666666666

Epoch: 6



563it [00:17, 31.37it/s]


Train set: Average loss: 1.7774, Accuracy: 13710/36000 (38.08%)


100%|██████████| 94/94 [00:01<00:00, 64.46it/s]

Test set: Average loss: 1.7503, Accuracy: 1966/6000 (32.77%)
1.7773750517103406
38.083333333333336
1.7503491414388022
32.766666666666666

Epoch: 7



563it [00:17, 31.38it/s]


Train set: Average loss: 1.0805, Accuracy: 21151/36000 (58.75%)


100%|██████████| 94/94 [00:01<00:00, 65.49it/s]

Test set: Average loss: 0.5487, Accuracy: 4768/6000 (79.47%)
1.0805469979974958
58.75277777777778
0.5486924085617065
79.46666666666667

Epoch: 8



563it [00:18, 31.27it/s]


Train set: Average loss: 0.5075, Accuracy: 29291/36000 (81.36%)


100%|██████████| 94/94 [00:01<00:00, 63.88it/s]

Test set: Average loss: 0.4766, Accuracy: 4989/6000 (83.15%)
0.507474880112542
81.3638888888889
0.4765944604873657
83.15

Epoch: 9



563it [00:17, 31.37it/s]


Train set: Average loss: 0.4435, Accuracy: 30262/36000 (84.06%)


100%|██████████| 94/94 [00:01<00:00, 66.66it/s]

Test set: Average loss: 0.4747, Accuracy: 4889/6000 (81.48%)
0.44351986241340635
84.06111111111112
0.47472569354375205
81.48333333333333

Epoch: 10



563it [00:18, 30.98it/s]


Train set: Average loss: 0.3958, Accuracy: 30787/36000 (85.52%)


100%|██████████| 94/94 [00:01<00:00, 65.92it/s]

Test set: Average loss: 0.3892, Accuracy: 5127/6000 (85.45%)
0.3957715063624912
85.51944444444445
0.3891851512591044
85.45

Epoch: 11



563it [00:18, 31.18it/s]


Train set: Average loss: 0.3608, Accuracy: 31243/36000 (86.79%)


100%|██████████| 94/94 [00:01<00:00, 66.81it/s]

Test set: Average loss: 0.3724, Accuracy: 5157/6000 (85.95%)
0.36081197693612843
86.78611111111111
0.3723902781804403
85.95

Epoch: 12



563it [00:18, 31.15it/s]


Train set: Average loss: 0.3325, Accuracy: 31587/36000 (87.74%)


100%|██████████| 94/94 [00:01<00:00, 65.32it/s]

Test set: Average loss: 0.3394, Accuracy: 5242/6000 (87.37%)
0.33247783275445303
87.74166666666666
0.3394018065134684
87.36666666666666

Epoch: 13



563it [00:17, 31.36it/s]


Train set: Average loss: 0.3078, Accuracy: 31902/36000 (88.62%)


100%|██████████| 94/94 [00:01<00:00, 66.59it/s]

Test set: Average loss: 0.3266, Accuracy: 5247/6000 (87.45%)
0.30781253544489545
88.61666666666666
0.3265546243985494
87.45

Epoch: 14



563it [00:18, 31.21it/s]


Train set: Average loss: 0.2872, Accuracy: 32167/36000 (89.35%)


100%|██████████| 94/94 [00:01<00:00, 66.25it/s]

Test set: Average loss: 0.3180, Accuracy: 5278/6000 (87.97%)
0.28719553849432206
89.35277777777777
0.3180377586682637
87.96666666666667

Epoch: 15



563it [00:17, 31.28it/s]


Train set: Average loss: 0.2774, Accuracy: 32257/36000 (89.60%)


100%|██████████| 94/94 [00:01<00:00, 64.98it/s]

Test set: Average loss: 0.3170, Accuracy: 5248/6000 (87.47%)
0.2774363271660275
89.60277777777777
0.31704275449117025
87.46666666666667

Epoch: 16



563it [00:18, 30.91it/s]


Train set: Average loss: 0.2629, Accuracy: 32489/36000 (90.25%)


100%|██████████| 94/94 [00:01<00:00, 65.60it/s]

Test set: Average loss: 0.2750, Accuracy: 5369/6000 (89.48%)
0.26285962580309974
90.24722222222222
0.2750286622842153
89.48333333333333

Epoch: 17



563it [00:17, 31.42it/s]


Train set: Average loss: 0.2551, Accuracy: 32596/36000 (90.54%)


100%|██████████| 94/94 [00:01<00:00, 66.47it/s]

Test set: Average loss: 0.2863, Accuracy: 5339/6000 (88.98%)
0.25507710401217143
90.54444444444445
0.28627689719200133
88.98333333333333

Epoch: 18



563it [00:17, 31.42it/s]


Train set: Average loss: 0.2455, Accuracy: 32649/36000 (90.69%)


100%|██████████| 94/94 [00:01<00:00, 66.17it/s]

Test set: Average loss: 0.2738, Accuracy: 5399/6000 (89.98%)
0.24553300676080916
90.69166666666666
0.27381358822186785
89.98333333333333

Epoch: 19



563it [00:18, 31.10it/s]


Train set: Average loss: 0.2375, Accuracy: 32784/36000 (91.07%)


100%|██████████| 94/94 [00:01<00:00, 65.29it/s]

Test set: Average loss: 0.2585, Accuracy: 5414/6000 (90.23%)
0.23745366793870926
91.06666666666666
0.2584817158381144
90.23333333333333

Epoch: 20



493it [00:15, 31.19it/s]


KeyboardInterrupt: ignored

#Get new model, only modify last layer

In [None]:
import copy
newModel = copy.deepcopy(model)

In [None]:
newModel

CNN(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (15): Flatten(start_dim=1, end_dim=-1)
    (16): Linear(in_features=2304, out_features=1024, bias=True)
    (17): ReLU()
    (18): Linear(in_features=1024, o

In [None]:
newModel.network[-1].out_features = 10

In [None]:
newModel

CNN(
  (network): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (15): Flatten(start_dim=1, end_dim=-1)
    (16): Linear(in_features=2304, out_features=1024, bias=True)
    (17): ReLU()
    (18): Linear(in_features=1024, o

In [None]:
trainsetFull = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloaderFull = torch.utils.data.DataLoader(trainsetFull, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testsetFull = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloaderFull = torch.utils.data.DataLoader(testsetFull, batch_size=batch_size,
                                         shuffle=False, num_workers=2)
print(trainsetFull.data.shape, trainsetFull.targets.shape)
print(testsetFull.data.shape, testsetFull.targets.shape)

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

In [None]:
Epochs = 100
train_loss_lst=[]
train_acc_lst=[]
val_loss_lst=[]
val_acc_lst=[]
for epoch in range(0, Epochs):
    print(f"\nEpoch: {epoch}")
    train_loss, train_acc = train(model, trainloaderFull, optimizer)
    test_loss, test_acc = test(model, testloaderFull)
    train_loss_lst.append(train_loss)
    train_acc_lst.append(train_acc)
    val_loss_lst.append(test_loss)
    val_acc_lst.append(test_acc)
    print(train_loss_lst[-1])
    print(train_acc_lst[-1])
    print(val_loss_lst[-1])
    print(val_acc_lst[-1])
    scheduler.step()