In [1]:
# !pip install pyitlib
# from pyitlib import discrete_random_variable as drv
import numpy as np
from glob import glob
import os
from PIL import Image
import operator
from shutil import copyfile
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import math
from skimage.measure import shannon_entropy
from google.colab import output
output.enable_custom_widget_manager()

train_on_gpu = torch.cuda.is_available()
print('Run on GPU: ' + str(train_on_gpu))

Run on GPU: True


In [2]:
# create Dataset object to support batch training
class Dataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transform):
        self.features = features             
        self.labels = labels
        self.transform = transform
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        if self.transform is None:
            return (self.features[idx], self.labels[idx])
        else:
            return (self.transform(self.features[idx]), self.labels[idx])

            
class Cutout(object):
    """Randomly mask out one or more patches from an image.
    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes, length):
        self.n_holes = n_holes
        self.length = length

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask

        return img

In [3]:
normalize = torchvision.transforms.transforms.Normalize(
    mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
    std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

In [37]:
def calculate_entropy(img):
    # convert image to gray-scale
    gray_image = img.convert('L')
    gray_image = np.array(gray_image)
    gray_image = gray_image / 255.0

    # Compute histogram of image pixel intensities
    hist = np.histogram(gray_image, bins=256)[0]
    
    # Normalize histogram to compute probabilities
    probabilities = hist / np.sum(hist)
    
    # Compute entropy
    entropy = -np.sum(probabilities * np.log2(probabilities + np.finfo(float).eps))

    return entropy

def sort_by_entropy(origin_dataset):
    results = []
    entropy_values = []
    for item in origin_dataset:
        entropy = calculate_entropy(item[0])
        results.append([item, entropy])
        entropy_values.append(entropy)

    results = sorted(results, key=lambda x: x[1])
    return results, entropy_values

In [5]:
def get_dataloader(trainset, testset, percentages):
    if sum(percentages) != 1:
      raise ValueError("Percentages do not add up to 100")

    transform_train = torchvision.transforms.Compose([
      torchvision.transforms.RandomCrop(32, padding=4),
      torchvision.transforms.RandomHorizontalFlip(),
      torchvision.transforms.ToTensor(),
      normalize,
      Cutout(n_holes=1, length=16)
    ])

    trainset, entropy_values = sort_by_entropy(trainset)

    trainset_len = len(trainset)
    chunk1 = int(trainset_len * percentages[0])
    chunk2 = int(trainset_len * percentages[1])
    chunk3 = trainset_len - chunk2 - chunk1
 

    chunk_count = 3
    train_dl_arr = []

    x_train = [item[0][0] for item in trainset[0:chunk1]]
    y_train = [item[0][1] for item in trainset[0:chunk1]]
    train_dl_arr.append(torch.utils.data.DataLoader(Dataset(x_train, y_train, transform_train), batch_size= 128, shuffle=True))

    x_train = [item[0][0] for item in trainset[chunk1:(chunk1 + chunk2)]]
    y_train = [item[0][1] for item in trainset[chunk1:(chunk1 + chunk2)]]
    train_dl_arr.append(torch.utils.data.DataLoader(Dataset(x_train, y_train, transform_train), batch_size= 128, shuffle=True))

    x_train = [item[0][0] for item in trainset[(chunk1 + chunk2):]]
    y_train = [item[0][1] for item in trainset[(chunk1 + chunk2):]]
    train_dl_arr.append(torch.utils.data.DataLoader(Dataset(x_train, y_train, transform_train), batch_size= 128, shuffle=True))
 
    # last chunk load all the data
    x_train = [item[0][0] for item in trainset]
    y_train = [item[0][1] for item in trainset]
    train_dl_arr.append(torch.utils.data.DataLoader(Dataset(x_train, y_train, transform_train), batch_size= 128, shuffle=True))
    
    testloader = torch.utils.data.DataLoader(testset, batch_size=2500, shuffle=False)
    
    return train_dl_arr, testloader

In [6]:
transform_test = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    normalize
])

trainset = torchvision.datasets.CIFAR10(root= 'data', train=True, download=True)
testset = torchvision.datasets.CIFAR10(root= 'data', train=False, download=True, transform=transform_test)
print('number of train images:' + str(len(trainset)))
print('number of test images:' + str(len(testset)))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:14<00:00, 11943629.66it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified
number of train images:50000
number of test images:10000


In [7]:
'''ResNet18/34/50/101/152 in Pytorch.'''
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable


def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(in_planes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = conv3x3(3,64)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18(num_classes=10):
    return ResNet(BasicBlock, [2,2,2,2], num_classes)


In [8]:
def get_predictions(model,data):
    return model(data).cpu().numpy().argmax(axis=1)    
 
def poly_learning_rate(base_lr, curr_iter, max_iter, power=0.9):
    """poly learning rate policy"""
    lr = base_lr * (1 - float(curr_iter) / max_iter) ** power
    return lr

In [23]:
import copy
def log_info(i_epoch, chunk_index):
      message = ' ::: epoch: ' + str(i_epoch)
      message += ' chunk: ' + str(chunk_index)
      return message
      
torch.manual_seed(5)
torch.cuda.manual_seed(5)

def trian_model(model, optimizer, learning_rate, epoch_num, train_sets, testloader, epoch_per_chunk):
    if train_on_gpu:
       model.cuda()

    criterion = torch.nn.CrossEntropyLoss()
    criterion.__init__(reduce=False)
    
    optimizer = optimizer(model.parameters(),lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=5e-4)
    val_accs = np.zeros(epoch_num)

    lr_arr = []
    
    info_message = ''
    chunk_index = 0
 
    traversed_chuncks = [0]
    p_bar = tqdm(range(epoch_num))

    count_complext_lr = 0
    current_lr = 0
    # Before training loop, initialize a counter for the total number of iterations
    total_iterations = 0

    for i_epoch in p_bar:
        model.train()

        # change chunk index.
        if (i_epoch + 1) > sum(epoch_per_chunk[0:(chunk_index+1)]):
          total_iterations = 0
          chunk_index += 1
          traversed_chuncks.append(chunk_index)
          info_message += log_info((i_epoch+1), chunk_index)

        train_set = train_sets[chunk_index]
        # max_iter = epoch_num * len(train_sets[0])
        max_iter = epoch_num * len(train_set)
        for i_batch, (X_batch, y_batch) in enumerate(train_set):
            # After each batch, increment the total_iterations
            total_iterations += 1
            if(train_on_gpu):
                X_batch, y_batch = X_batch.cuda(), y_batch.cuda()

            model.zero_grad()  # reset model gradients
            output = model(X_batch)  # conduct forward pass  

            loss=criterion(output, y_batch) 

            loss = loss.mean() # added for example_forgetting github 
            loss.backward()  # backpropogate loss to calculate gradients
 
            new_lr = poly_learning_rate(learning_rate, total_iterations, max_iter, power=0.9)
            if type(new_lr) == complex:
              count_complext_lr += 1
            else:
              current_lr = new_lr
              
            for g in optimizer.param_groups:
                 g['lr'] = current_lr
            lr_arr.append(current_lr)

            try:
              optimizer.step()  # update model weights  
            except:
              print('current_iter: '+ str(total_iterations))
              print('last max_iter: '+ str(max_iter))
              print('last learning: '+ str(current_lr))
              optimizer.step()  # update model weights  
            
 
 
        correct = 0
        total = 0
        with torch.no_grad():  # no need to calculate gradients when assessing accuracy
            for i_batch, (X_val, Y_val) in enumerate(testloader):

                if(train_on_gpu):
                  X_val, Y_val = X_val.cuda(), Y_val.cuda()

                model.eval()

                pred_val = get_predictions(model, X_val)
                total += X_val.size(0)
                correct += (pred_val == Y_val.cpu().numpy()).sum()

            val_acc = 100. * correct.item() / total
            val_accs[i_epoch] = val_acc
            p_bar.set_description(("max accuracy: " + str(val_accs.max()) + ' accuracy: ' + str(val_acc)) +
                                  ' last lr: ' + str(current_lr) +' chunk index: ' +str(chunk_index), 
                                   refresh=True)
            
    print('Traversed chunks'+ str(traversed_chuncks))  
    print(info_message)
    print("count_complext_lr: " + str(count_complext_lr))    
    return val_accs, lr_arr



In [35]:
train_sets, testloader = get_dataloader(trainset, testset, [0.05,0.15,0.80]) 
print('number of remaining train images in chunk 0:' + str(len(train_sets[0].dataset)))
print('number of remaining train images in chunk 1:' + str(len(train_sets[1].dataset)))
print('number of remaining train images in chunk 2:' + str(len(train_sets[2].dataset)))
print('number of remaining train images in chunk 3:' + str(len(train_sets[3].dataset)))
print('number of test images:' + str(len(testloader.dataset)))

number of remaining train images in chunk 0:2500
number of remaining train images in chunk 1:7500
number of remaining train images in chunk 2:40000
number of remaining train images in chunk 3:50000
number of test images:10000


In [None]:
resNet18 = ResNet18()  
optimizer = torch.optim.SGD
epoch_per_chunk = [3, 7, 40, 1]
val_acc, lr_arr = trian_model(resNet18, optimizer, 0.1, 51, train_sets, testloader, epoch_per_chunk)

In [None]:
plt.plot(lr_arr)  
plt.xlabel('steps')
plt.ylabel('lr value')
plt.show()

In [None]:
plt.plot(val_acc)  
plt.xlabel('steps')
plt.ylabel('acc value')
plt.show()

In [None]:
# run conventional model
import copy

def train_conventional_model(model, optimizer, learning_rate, epoch_num, train_set, testloader):
    if train_on_gpu:
       model.cuda()

    criterion = torch.nn.CrossEntropyLoss()
    criterion.__init__(reduce=False)
    
    optimizer = optimizer(model.parameters(),lr=learning_rate, momentum=0.9, nesterov=True, weight_decay=5e-4)
    val_accs = np.zeros(epoch_num)

    lr_arr = []
    
    info_message = ''
 
    traversed_chuncks = [0]
    p_bar = tqdm(range(epoch_num))
 
    count_complext_lr = 0
    current_lr = 0
    total_iterations = 0

    for i_epoch in p_bar:
        model.train()

        max_iter = epoch_num * len(train_set)
        for i_batch, (X_batch, y_batch) in enumerate(train_set):         
            total_iterations += 1
            if(train_on_gpu):
                X_batch, y_batch = X_batch.cuda(), y_batch.cuda()

            model.zero_grad()  # reset model gradients
            output = model(X_batch)  # conduct forward pass  

            loss=criterion(output, y_batch) 

            loss = loss.mean() # added for example_forgetting github 
            loss.backward()  # backpropogate loss to calculate gradients
  
            current_lr = poly_learning_rate(learning_rate, total_iterations, max_iter, power=0.9)
              
            for g in optimizer.param_groups:
                 g['lr'] = current_lr
            lr_arr.append(current_lr)

            try:
              optimizer.step()  # update model weights  
            except:
              print('current_iter: '+ str(total_iterations))
              print('last max_iter: '+ str(max_iter))
              print('last learning: '+ str(current_lr))
              optimizer.step()  # update model weights  
             
        correct = 0
        total = 0
        with torch.no_grad():  # no need to calculate gradients when assessing accuracy
            for i_batch, (X_val, Y_val) in enumerate(testloader):

                if(train_on_gpu):
                  X_val, Y_val = X_val.cuda(), Y_val.cuda()

                model.eval()

                pred_val = get_predictions(model, X_val)
                total += X_val.size(0)
                correct += (pred_val == Y_val.cpu().numpy()).sum()

            val_acc = 100. * correct.item() / total
            val_accs[i_epoch] = val_acc
            p_bar.set_description(("max accuracy: " + str(val_accs.max()) + ' accuracy: ' + str(val_acc)) +
                                  ' last lr: ' + str(current_lr), 
                                   refresh=True)
            
    print('Traversed chunks'+ str(traversed_chuncks))  
    print(info_message)
    print("count_complext_lr: " + str(count_complext_lr))    
    return val_accs, lr_arr

In [None]:
resNet18 = ResNet18()  
optimizer = torch.optim.SGD
val_acc, lr_arr = train_conventional_model(resNet18, optimizer, 0.1, 51, train_sets[3], testloader)

In [None]:
plt.plot(lr_arr)  
plt.xlabel('steps')
plt.ylabel('lr value')
plt.show()