In [1]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 13783920646984529195, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 15469833088
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 5619898055862089867
 physical_device_desc: "device: 0, name: Tesla V100-SXM2-16GB, pci bus id: 0000:00:04.0, compute capability: 7.0"]

In [27]:
import time
import copy
import numpy as np
import os
import subprocess
import sys
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from matplotlib import pyplot as plt
from torch.utils.data.sampler import SubsetRandomSampler
from drive.MyDrive.cords.selectionstrategies.supervisedlearning.glisterstrategy import GLISTERStrategy as Strategy
from drive.MyDrive.cords.utils.models.resnet import ResNet18
from drive.MyDrive.cords.utils.custom_dataset import load_mnist_cifar
from torch.utils.data import random_split, SequentialSampler, BatchSampler, RandomSampler
from torch.autograd import Variable
import math
import tqdm

In [9]:
def model_eval_loss(data_loader, model, criterion):
    total_loss = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss

In [10]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using Device:", device)


Using Device: cuda


#Training Arguments

In [11]:
datadir = '../../data'
data_name = 'cifar10'
fraction = float(0.1)
num_epochs = int(300)
select_every = int(20)
feature = 'dss'# 70
warm_method = 0  # whether to use warmstart-onestep (1) or online (0)
num_runs = 1  # number of random runs
learning_rate = 0.05


#Results Folder

In [12]:
all_logs_dir = './results/' + data_name +'/' + feature +'/' + str(fraction) + '/' + str(select_every)
print(all_logs_dir)
subprocess.run(["mkdir", "-p", all_logs_dir])
path_logfile = os.path.join(all_logs_dir, data_name + '.txt')
logfile = open(path_logfile, 'w')
exp_name = data_name + '_fraction:' + str(fraction) + '_epochs:' + str(num_epochs) + \
           '_selEvery:' + str(select_every) + '_variant' + str(warm_method) + '_runs' + str(num_runs)
print(exp_name)


./results/cifar10/dss/0.1/20
cifar10_fraction:0.1_epochs:300_selEvery:20_variant0_runs1


#Loading CIFAR10 Dataset

In [14]:
print("=======================================", file=logfile)
fullset, valset, testset, num_cls = load_mnist_cifar(datadir, data_name, feature)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../data/cifar-10-python.tar.gz to ../data
Files already downloaded and verified
Files already downloaded and verified


#Splitting Training dataset to train and validation sets

In [15]:
validation_set_fraction = 0.1
num_fulltrn = len(fullset)
num_val = int(num_fulltrn * validation_set_fraction)
num_trn = num_fulltrn - num_val
trainset, validset = random_split(fullset, [num_trn, num_val])
N = len(trainset)
trn_batch_size = 20


#Creating DataLoaders

In [16]:
trn_batch_size = 20
val_batch_size = 1000
tst_batch_size = 1000


trainloader = torch.utils.data.DataLoader(trainset, batch_size=trn_batch_size,
                                          shuffle=False, pin_memory=True)

valloader = torch.utils.data.DataLoader(valset, batch_size=val_batch_size, shuffle=False,
                                               sampler=SubsetRandomSampler(validset.indices),
                                               pin_memory=True)

testloader = torch.utils.data.DataLoader(testset, batch_size=tst_batch_size,
                                         shuffle=False, pin_memory=True)


#Budget for Data Subset Selection

In [17]:
bud = int(fraction * N)
print("Budget, fraction and N:", bud, fraction, N)
# Transfer all the data to GPU
print_every = 3

Budget, fraction and N: 4500 0.1 45000


#Loading ResNet Model

In [18]:
model = ResNet18(num_cls)
model = model.to(device)

In [20]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

#Initial Random Subset for Training

In [21]:
start_idxs = np.random.choice(N, size=bud, replace=False)

#Loss Type, Optimizer and Learning Rate Scheduler

In [22]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                          momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    

#Last Layer GLISTER Strategy with Stcohastic Selection

In [23]:
setf_model = Strategy(trainloader, valloader, model, criterion,
                              learning_rate, device, num_cls, False, 'Stochastic')

In [30]:
idxs = start_idxs
print("Starting Greedy Selection Strategy!")
substrn_losses = np.zeros(num_epochs)
fulltrn_losses = np.zeros(num_epochs)
val_losses = np.zeros(num_epochs)
timing = np.zeros(num_epochs)
val_acc = np.zeros(num_epochs)
tst_acc = np.zeros(num_epochs)
full_trn_acc = np.zeros(num_epochs)
subtrn_acc = np.zeros(num_epochs)
subset_trnloader = torch.utils.data.DataLoader(trainset, batch_size=trn_batch_size,
              shuffle=False, sampler=SubsetRandomSampler(idxs), pin_memory=True)

Starting Greedy Selection Strategy!


#Training Loop

In [31]:
    for i in tqdm.trange(num_epochs):
        subtrn_loss = 0
        subtrn_correct = 0
        subtrn_total = 0
        start_time = time.time()
        if (((i+1) % select_every) == 0):
            cached_state_dict = copy.deepcopy(model.state_dict())
            clone_dict = copy.deepcopy(model.state_dict())
            print("selEpoch: %d, Starting Selection:" % i, str(datetime.datetime.now()))
            subset_start_time = time.time()
            subset_idxs, grads_idxs = setf_model.select(int(bud), clone_dict)
            subset_end_time = time.time() - subset_start_time
            print("Subset Selection Time is:" + str(subset_end_time))
            idxs = subset_idxs
            print("selEpoch: %d, Selection Ended at:" % (i), str(datetime.datetime.now()))
            model.load_state_dict(cached_state_dict)
            subset_trnloader = torch.utils.data.DataLoader(trainset, batch_size=trn_batch_size,
            			shuffle=False, sampler=SubsetRandomSampler(idxs), pin_memory=True)
            
        model.train() 
        for batch_idx, (inputs, targets) in enumerate(subset_trnloader):
            inputs, targets = inputs.to(device), targets.to(device, non_blocking=True) # targets can have non_blocking=True.
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            subtrn_loss += loss.item()
            loss.backward()
            optimizer.step()
            _, predicted = outputs.max(1)
            subtrn_total += targets.size(0)
            subtrn_correct += predicted.eq(targets).sum().item()
        scheduler.step()
        timing[i] = time.time() - start_time
        #print("Epoch timing is: " + str(timing[i]))
        val_loss = 0
        val_correct = 0
        val_total = 0
        tst_correct = 0
        tst_total = 0
        tst_loss = 0
        full_trn_loss = 0
        #subtrn_loss = 0
        full_trn_correct = 0
        full_trn_total = 0
        model.eval()
        with torch.no_grad():

            for batch_idx, (inputs, targets) in enumerate(valloader):
                #print(batch_idx)
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()

            for batch_idx, (inputs, targets) in enumerate(testloader):
                #print(batch_idx)
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                tst_loss += loss.item()
                _, predicted = outputs.max(1)
                tst_total += targets.size(0)
                tst_correct += predicted.eq(targets).sum().item()

            for batch_idx, (inputs, targets) in enumerate(trainloader):
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                full_trn_loss += loss.item()
                _, predicted = outputs.max(1)
                full_trn_total += targets.size(0)
                full_trn_correct += predicted.eq(targets).sum().item()

        val_acc[i] = val_correct/val_total
        tst_acc[i] = tst_correct/tst_total
        subtrn_acc[i] = subtrn_correct/subtrn_total
        full_trn_acc[i] = full_trn_correct/full_trn_total
        substrn_losses[i] = subtrn_loss
        fulltrn_losses[i] = full_trn_loss
        val_losses[i] = val_loss
        print('Epoch:', i + 1, 'SubsetTrn,FullTrn,ValLoss,Time:', subtrn_loss, full_trn_loss, val_loss, timing[i])


  0%|          | 1/300 [00:35<2:58:55, 35.91s/it]

Epoch: 1 SubsetTrn,FullTrn,ValLoss,Time: 572.9529604911804 4572.160925388336 10.169380903244019 5.473708152770996


KeyboardInterrupt: ignored

#Results Logging

In [None]:
print("SelectionRun---------------------------------")
print("Final SubsetTrn and FullTrn Loss:", subtrn_loss, full_trn_loss)
print("Validation Loss and Accuracy:", val_loss, val_acc[-1])
print("Test Data Loss and Accuracy:", tst_loss, tst_acc[-1])
print('-----------------------------------')

print("GLISTER", file=logfile)
print('---------------------------------------------------------------------', file=logfile)
val = "Validation Accuracy,"
tst = "Test Accuracy,"
time_str = "Time,"
for i in range(num_epochs):
    time_str = time_str + "," + str(timing[i])
    val = val + "," + str(val_acc[i])
    tst = tst + "," + str(tst_acc[i])
print(timing, file=logfile)
print(val, file=logfile)
print(tst, file=logfile)

#Full Data Training

In [33]:
torch.manual_seed(42)
np.random.seed(42)
model = ResNet18(num_cls)
model = model.to(device)

In [34]:
idxs = start_idxs
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(model.parameters(), lr=learning_rate)
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                    momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
print("Starting Full Training Run!")

Starting Full Training Run!


In [35]:
substrn_losses = np.zeros(num_epochs)
fulltrn_losses = np.zeros(num_epochs)
val_losses = np.zeros(num_epochs)
subset_trnloader = torch.utils.data.DataLoader(trainset, batch_size=trn_batch_size, shuffle=False,
                                                sampler=SubsetRandomSampler(idxs),
                                                pin_memory=True)

timing = np.zeros(num_epochs)
val_acc = np.zeros(num_epochs)
tst_acc = np.zeros(num_epochs)
full_trn_acc = np.zeros(num_epochs)
subtrn_acc = np.zeros(num_epochs)


#Full Training Loop

In [None]:
for i in tqdm.trange(num_epochs):
    start_time = time.time()
    model.train()
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
        # Variables in Pytorch are differentiable.
        inputs, target = Variable(inputs), Variable(inputs)
        # This will zero out the gradients for this batch.
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    scheduler.step()
    timing[i] = time.time() - start_time
    val_loss = 0
    val_correct = 0
    val_total = 0
    tst_correct = 0
    tst_total = 0
    tst_loss = 0
    full_trn_loss = 0
    subtrn_loss = 0
    full_trn_correct = 0
    full_trn_total = 0
    subtrn_correct = 0
    subtrn_total = 0
    model.eval()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            # print(batch_idx)
            inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            val_total += targets.size(0)
            val_correct += predicted.eq(targets).sum().item()

        for batch_idx, (inputs, targets) in enumerate(testloader):
            # print(batch_idx)
            inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            tst_loss += loss.item()
            _, predicted = outputs.max(1)
            tst_total += targets.size(0)
            tst_correct += predicted.eq(targets).sum().item()

        for batch_idx, (inputs, targets) in enumerate(trainloader):
            inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            full_trn_loss += loss.item()
            _, predicted = outputs.max(1)
            full_trn_total += targets.size(0)
            full_trn_correct += predicted.eq(targets).sum().item()

        for batch_idx, (inputs, targets) in enumerate(subset_trnloader):
            inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            subtrn_loss += loss.item()
            _, predicted = outputs.max(1)
            subtrn_total += targets.size(0)
            subtrn_correct += predicted.eq(targets).sum().item()

    val_acc[i] = val_correct / val_total
    tst_acc[i] = tst_correct / tst_total
    subtrn_acc[i] = subtrn_correct / subtrn_total
    full_trn_acc[i] = full_trn_correct / full_trn_total
    substrn_losses[i] = subtrn_loss
    fulltrn_losses[i] = full_trn_loss
    val_losses[i] = val_loss
    print('Epoch:', i + 1, 'SubsetTrn,FullTrn,ValLoss,Time:', subtrn_loss, full_trn_loss, val_loss, timing[i])


  0%|          | 0/300 [00:00<?, ?it/s][A

#Results and Timing Logging

In [None]:
print("SelectionRun---------------------------------")
print("Final SubsetTrn and FullTrn Loss:", subtrn_loss, full_trn_loss)
print("Validation Loss and Accuracy:", val_loss, val_acc[-1])
print("Test Data Loss and Accuracy:", tst_loss, tst_acc[-1])
print('-----------------------------------')

print("Full Training", file=logfile)
print('---------------------------------------------------------------------', file=logfile)
val = "Validation Accuracy,"
tst = "Test Accuracy,"
time_str = "Time,"
for i in range(num_epochs):
    time_str = time_str + "," + str(timing[i])
    val = val + "," + str(val_acc[i])
    tst = tst + "," + str(tst_acc[i])
print(timing, file=logfile)
print(val, file=logfile)
print(tst, file=logfile)

In [None]:
logfile.close()