# SMI AL Loop

In [1]:
import h5py
import time
import random
import datetime
import copy
import numpy as np
import os
import csv
import json
import subprocess
import sys
import PIL.Image as Image
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.models as models
from matplotlib import pyplot as plt
from torch.utils.data.sampler import SubsetRandomSampler
from cords.cords.selectionstrategies.supervisedlearning import DataSelectionStrategy
# from cords.cords.utils.models import ResNet18
from distil.distil.utils.models.resnet import ResNet18
from gable.gable.utils.custom_dataset import load_dataset_custom
from torch.utils.data import Subset
from torch.autograd import Variable
import tqdm
from math import floor
from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances
seed=42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed) 
# for cuda
# torch.cuda.manual_seed_all(seed)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False
# torch.backends.cudnn.enabled = False

In [2]:
from torch.utils.data import Dataset
class custom_subset(Dataset):
    r"""
    Subset of a dataset at specified indices.

    Arguments:
        dataset (Dataset): The whole Dataset
        indices (sequence): Indices in the whole set selected for subset
        labels(sequence) : targets as required for the indices. will be the same length as indices
    """
    def __init__(self, dataset, indices, labels):
        self.dataset = torch.utils.data.Subset(dataset, indices)
        self.targets = labels.type(torch.long)
    def __getitem__(self, idx):
        image = self.dataset[idx][0]
        target = self.targets[idx]
        return (image, target)

    def __len__(self):
        return len(self.targets)

In [3]:
def model_eval_loss(data_loader, model, criterion):
    total_loss = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss

def init_weights(m):
#     torch.manual_seed(35)
    if isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)
    elif isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

def weight_reset(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        m.reset_parameters()
                
def create_model(name, num_cls, device):
    if name == 'ResNet18':
        model = ResNet18(num_cls)
    elif name == 'MnistNet':
        model = MnistNet()
    elif name == 'ResNet164':
        model = ResNet164(num_cls)
    model.apply(init_weights)
    model = model.to(device)
    return model

def loss_function():
    criterion = nn.CrossEntropyLoss()
    criterion_nored = nn.CrossEntropyLoss(reduction='none')
    return criterion, criterion_nored

def optimizer_with_scheduler(model, num_epochs, learning_rate, m=0.9, wd=5e-4):
    optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                          momentum=m, weight_decay=wd)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    return optimizer, scheduler

def optimizer_without_scheduler(model, learning_rate, m=0.9, wd=5e-4):
#     optimizer = optim.Adam(model.parameters(),weight_decay=wd)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                          momentum=m, weight_decay=wd)
    return optimizer

def generate_cumulative_timing(mod_timing):
    tmp = 0
    mod_cum_timing = np.zeros(len(mod_timing))
    for i in range(len(mod_timing)):
        tmp += mod_timing[i]
        mod_cum_timing[i] = tmp
    return mod_cum_timing/3600

def kernel(x, y, measure="cosine", exp=2):
    if(measure=="eu_sim"):
        dist = pairwise_distances(x.cpu().numpy(), y.cpu().numpy())
        sim = max(dist.ravel()) - dist
#         n = x.size(0)
#         m = y.size(0)
#         d = x.size(1)
#         x = x.unsqueeze(1).expand(n, m, d)
#         y = y.unsqueeze(0).expand(n, m, d)
#         dist = torch.pow(x - y, exp).sum(2)
#         const = torch.max(dist).item()
#         sim = (const - dist)
    
        #dist = torch.exp(-1 * torch.pow(x - y, 2).sum(2))
    if(measure=="cosine"):
        sim = cosine_similarity(x.cpu().numpy(), y.cpu().numpy())
    return sim


def save_kernel_hdf5(lake_kernel, lake_target_kernel, target_kernel=[], numpy=True):
    if(not(numpy)):
        lake_kernel = lake_kernel.cpu().numpy()
    with h5py.File("smi_lake_kernel.hdf5", 'w') as hf:
        hf.create_dataset("kernel",  data=lake_kernel)
    if(not(numpy)):
        lake_target_kernel = lake_target_kernel.cpu().numpy()
    with h5py.File("smi_lake_target_kernel.hdf5", 'w') as hf:
        hf.create_dataset("kernel",  data=lake_target_kernel)
    if(not(numpy)):
        target_kernel = target_kernel.cpu().numpy()
    with h5py.File("smi_target_kernel.hdf5", 'w') as hf:
        hf.create_dataset("kernel",  data=target_kernel)
            
def find_err_per_class(test_set, val_set, final_val_classifications, final_val_predictions, final_tst_classifications, 
                       final_tst_predictions, saveDir, prefix):
    #find queries from the validation set that are erroneous
#     saveDir = os.path.join(saveDir, prefix)
#     if(not(os.path.exists(saveDir))):
#         os.mkdir(saveDir)
    val_err_idx = list(np.where(np.array(final_val_classifications) == False)[0])
    tst_err_idx = list(np.where(np.array(final_tst_classifications) == False)[0])
    val_class_err_idxs = []
    tst_err_log = []
    val_err_log = []
    for i in range(num_cls):
        tst_class_idxs = list(torch.where(torch.Tensor(test_set.targets) == i)[0].cpu().numpy())
        val_class_idxs = list(torch.where(torch.Tensor(val_set.targets.float()) == i)[0].cpu().numpy())
        #err classifications per class
        val_err_class_idx = set(val_err_idx).intersection(set(val_class_idxs))
        tst_err_class_idx = set(tst_err_idx).intersection(set(tst_class_idxs))
        if(len(val_class_idxs)>0):
            val_error_perc = round((len(val_err_class_idx)/len(val_class_idxs))*100,2)
        else:
            val_error_perc = 0
        tst_error_perc = round((len(tst_err_class_idx)/len(tst_class_idxs))*100,2)
        print("val, test error% for class ", i, " : ", val_error_perc, tst_error_perc)
        val_class_err_idxs.append(val_err_class_idx)
        tst_err_log.append(tst_error_perc)
        val_err_log.append(val_error_perc)
    tst_err_log.append(sum(tst_err_log)/len(tst_err_log))
    val_err_log.append(sum(val_err_log)/len(val_err_log))
    return tst_err_log, val_err_log, val_class_err_idxs


def aug_train_subset(train_set, lake_set, true_lake_set, subset, lake_subset_idxs, budget, augrandom=False):
    all_lake_idx = list(range(len(lake_set)))
    if(not(len(subset)==budget) and augrandom):
        print("Budget not filled, adding ", str(int(budget) - len(subset)), " randomly.")
        remain_budget = int(budget) - len(subset)
        remain_lake_idx = list(set(all_lake_idx) - set(subset))
        random_subset_idx = list(np.random.choice(np.array(remain_lake_idx), size=int(remain_budget), replace=False))
        subset += random_subset_idx
    lake_ss = custom_subset(true_lake_set, subset, torch.Tensor(true_lake_set.targets.float())[subset])
    remain_lake_idx = list(set(all_lake_idx) - set(lake_subset_idxs))
    remain_lake_set = custom_subset(lake_set, remain_lake_idx, torch.Tensor(lake_set.targets.float())[remain_lake_idx])
    remain_true_lake_set = custom_subset(true_lake_set, remain_lake_idx, torch.Tensor(true_lake_set.targets.float())[remain_lake_idx])
    assert((len(lake_ss)+len(remain_lake_set))==len(lake_set))
    aug_train_set = torch.utils.data.ConcatDataset([train_set, lake_ss])
    return aug_train_set, remain_lake_set, remain_true_lake_set
                        
def getMisclsSet(val_set, val_class_err_idxs, imb_cls_idx):
    miscls_idx = []
    for i in range(len(val_class_err_idxs)):
        if i in imb_cls_idx:
            miscls_idx += val_class_err_idxs[i]
    print("total misclassified ex from imb classes: ", len(miscls_idx))
    return Subset(val_set, miscls_idx)

def getMisclsSetNumpy(X_val, y_val, val_class_err_idxs, imb_cls_idx):
    miscls_idx = []
    for i in range(len(val_class_err_idxs)):
        if i in imb_cls_idx:
            miscls_idx += val_class_err_idxs[i]
    print("total misclassified ex from imb classes: ", len(miscls_idx))
    return X_val[miscls_idx], y_val[miscls_idx]

def getPrivateSet(lake_set, subset, private_set):
    #augment prev private set and current subset
    new_private_set = custom_subset(lake_set, subset, torch.Tensor(lake_set.targets.float())[subset])
#     new_private_set =  Subset(lake_set, subset)
    total_private_set = torch.utils.data.ConcatDataset([private_set, new_private_set])
    return total_private_set

def getSMI_ss(datkbuildPath, exePath, hdf5Path, budget, numQueries, sf):
    if(sf=="fl1mi"):
        command = os.path.join(datkbuildPath, exePath) + " -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget " + str(budget) + " -queryPrivacyOptimizer " + sf + " -numQueries " + numQueries + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path, "smi_lake_kernel.hdf5") +  " -queryKernelFile " + os.path.join(hdf5Path, "smi_lake_target_kernel.hdf5")
    elif(sf == "logdetmi"):
        command = os.path.join(datkbuildPath, "cifarSubsetSelector_ng") + " -mode query -naiveOrRandom naive -logDetLambda 1 -magnificationLambda 1 -numSummaries 1 -budget " + str(budget) + " -queryPrivacyOptimizer " + sf + " -numQueries  " + numQueries + "  -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path, "smi_lake_kernel.hdf5") + " -queryKernelFile " + os.path.join(hdf5Path, "smi_lake_target_kernel.hdf5") + " -queryqueryKernelFile " + os.path.join(hdf5Path, "smi_target_kernel.hdf5")
    elif(sf=="fl2mi"):
        command = os.path.join(datkbuildPath, exePath) + " -mode query -naiveOrRandom naive -queryDiversityLambda 1 -magnificationLambda 1 -numSummaries 1 -budget " + str(budget) + " -queryPrivacyOptimizer " + sf + " -numQueries  " + numQueries + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path, "smi_lake_kernel.hdf5") + " -queryKernelFile " + os.path.join(hdf5Path, "smi_lake_target_kernel.hdf5")
    elif(sf=="gcmi" or sf=="div-gcmi"):
        command = os.path.join(datkbuildPath, exePath) + " -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget " + str(budget) + " -queryPrivacyOptimizer " + sf + " -numQueries " + numQueries + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path,"smi_lake_kernel.hdf5") + " -queryKernelFile " + os.path.join(hdf5Path,"smi_lake_target_kernel.hdf5")
    elif(sf=="gccg"):
        command = os.path.join(datkbuildPath, exePath) + " -mode private -naiveOrRandom naive -gcLambda 1 -magnificationLambda 1 -numSummaries 1 -budget " + str(budget) + " -queryPrivacyOptimizer " + sf + " -numQueries " + numQueries + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path,"smi_lake_kernel.hdf5") + " -privateKernelFile " + os.path.join(hdf5Path,"smi_lake_target_kernel.hdf5")
    elif(sf=="fl1cg"):
        command = os.path.join(datkbuildPath, exePath) + " -mode private -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget " + str(budget) + " -queryPrivacyOptimizer " + sf + " -numQueries " + numQueries + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path,"smi_lake_kernel.hdf5") + " -privateKernelFile " + os.path.join(hdf5Path,"smi_lake_target_kernel.hdf5")
    elif(sf=="logdetcg"):
        command = os.path.join(datkbuildPath, exePath) + " -mode private -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget " + str(budget) + " -queryPrivacyOptimizer " + sf + " -numQueries " + numQueries + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path,"smi_lake_kernel.hdf5") + " -privateKernelFile " + os.path.join(hdf5Path,"smi_lake_target_kernel.hdf5") + " -privateprivateKernelFile " + os.path.join(hdf5Path, "smi_target_kernel.hdf5")
    elif(sf=="fl" or sf=="logdet"):
        command = os.path.join(datkbuildPath, "cifarSubsetSelector_ng") + " -mode generic -naiveOrRandom naive -logDetLambda 1 -numSummaries 1 -budget " + str(budget) + " -genericOptimizer " + sf + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path,"smi_lake_kernel.hdf5")
    elif(sf =="gc"):
        command = os.path.join(datkbuildPath, exePath) + " -mode generic -naiveOrRandom naive -gcLambda 1 -numSummaries 1 -budget " + str(budget) + " -genericOptimizer " + sf + " -dontComputeKernel true -imageKernelFile " + os.path.join(hdf5Path,"smi_lake_kernel.hdf5")
    print("Executing SIM command: ", command)
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=True, shell=True)
    subset = process.communicate()[0]
    subset = subset.decode("utf-8")
    subset = subset.strip().split(" ")
    subset = list(map(int, subset))
    return subset

def remove_ood_points(lake_set, subset, idc_idx):
    idx_subset = []
    subset_cls = torch.Tensor(lake_set.targets.float())[subset]
    for i in idc_idx:
        idc_subset_idx = list(torch.where(subset_cls == i)[0].cpu().numpy())
        idx_subset += list(np.array(subset)[idc_subset_idx])
    print(len(idx_subset),"/",len(subset), " idc points.")
    return idx_subset

def getPerClassSel(lake_set, subset, num_cls):
    perClsSel = []
    subset_cls = torch.Tensor(lake_set.targets.float())[subset]
    for i in range(num_cls):
        cls_subset_idx = list(torch.where(subset_cls == i)[0].cpu().numpy())
        perClsSel.append(len(cls_subset_idx))
    return perClsSel

#check overlap with prev selections
def check_overlap(prev_idx, prev_idx_hist, idx):
    prev_idx = [int(x/num_rep) for x in prev_idx]
    prev_idx_hist = [int(x/num_rep) for x in prev_idx_hist]
    idx = [int(x/num_rep) for x in idx]
    # overlap = set(prev_idx).intersection(set(idx))
    overlap = [value for value in idx if value in prev_idx] 
    # overlap_hist = set(prev_idx_hist).intersection(set(idx))
    overlap_hist = [value for value in idx if value in prev_idx_hist]
    new_points = set(idx) - set(prev_idx_hist)
    total_unique_points = set(idx+prev_idx_hist)
    print("New unique points: ", len(new_points))
    print("Total unique points: ", len(total_unique_points))
    print("overlap % of sel with prev idx: ", len(overlap)/len(idx))
    print("overlap % of sel with all prev idx: ", len(overlap_hist)/len(idx))
    return len(overlap)/len(idx), len(overlap_hist)/len(idx)


In [4]:
datadir = 'data/'
data_name = 'cifar10'
num_cls=10
fraction = float(0.1)
budget=30
num_epochs = int(20)
num_rep = 10
model_name = 'ResNet18'
learning_rate = 0.01
# feature='vanilla'
# split_cfg = {"train_size":500, "val_size":1000, "lake_size":5000, "num_rep":num_rep, "lake_subset_repeat_size":1000}
# feature = 'duplicate'
feature = 'classimb'
split_cfg = {"num_cls_imbalance":2, "per_imbclass_train":10, "per_imbclass_val":5, "per_imbclass_lake":150, "per_class_train":200, "per_class_val":5, "per_class_lake":3000} #cifar10
# split_cfg = {"num_cls_imbalance":2, "per_imbclass_train":10, "per_imbclass_val":5, "per_imbclass_lake":75, "per_class_train":200, "per_class_val":5, "per_class_lake":295} #cifar100
initModelPath = data_name + "_" + model_name + "_" + str(learning_rate) + "_" + str(split_cfg["per_imbclass_train"]) + "_" + str(split_cfg["per_class_train"]) + "_" + str(split_cfg["num_cls_imbalance"])
# feature = 'ood'
# split_cfg = {'num_cls_idc':5, 'per_idc_train':100, 'per_idc_val':10, 'per_idc_lake':500, 'per_ood_train':0, 'per_ood_val':0, 'per_ood_lake':5000}#cifar10
# split_cfg = {'num_cls_idc':50, 'per_idc_train':100, 'per_idc_val':2, 'per_idc_lake':100, 'per_ood_train':0, 'per_ood_val':0, 'per_ood_lake':500}#cifar100
# initModelPath = "weights/"+data_name + "_" + feature + "_" + model_name + "_" + str(learning_rate) + "_" + str(split_cfg["per_idc_train"]) + "_" + str(split_cfg["per_idc_val"]) + "_" + str(split_cfg["num_cls_idc"])
num_runs = 1  # number of random runs
computeClassErrorLog = True
run=2
magnification = 1
device = "cuda" if torch.cuda.is_available() else "cpu"
datkbuildPath = "/home/snk170001/bioml/dss/notebooks/datk/build"
exePath = "cifarSubsetSelector"
print("Using Device:", device)
doublePrecision = True
linearLayer = True

Using Device: cuda


In [5]:
from distil.distil.active_learning_strategies import BADGE, EntropySampling, GLISTER, GradMatchActive
from distil.distil.utils.DataHandler import DataHandler_CIFAR10

# AL Like Train Loop

In [9]:
def train_model_al(datkbuildPath, exePath, num_epochs, dataset_name, datadir, feature, model_name, budget, split_cfg, learning_rate, run,
                device, computeErrorLog, strategy="SIM", sf=""):
#     torch.manual_seed(42)
#     np.random.seed(42)
    print(strategy, sf)
    #load the dataset based on type of feature
    if(feature=="classimb" or feature=="ood"):
        if(strategy == "SIM" or strategy == "SF" or strategy=="random"):
            if(strategy == "SF" or strategy=="random"):
                train_set, val_set, test_set, lake_set, sel_cls_idx, num_cls = load_dataset_custom(datadir, dataset_name, feature, split_cfg, False, True)
            else:
                train_set, val_set, test_set, lake_set, sel_cls_idx, num_cls = load_dataset_custom(datadir, dataset_name, feature, split_cfg, False, False)
        elif(strategy=="AL"):
            if(sf=="badge" or sf=="us"):
                X_tr, y_tr, X_val, y_val, X_unlabeled, y_unlabeled, train_set, val_set, test_set, lake_set, sel_cls_idx, num_cls = load_dataset_custom(datadir, dataset_name, feature, split_cfg, True, True)
            else: #dont augment train with valid
                X_tr, y_tr, X_val, y_val, X_unlabeled, y_unlabeled, train_set, val_set, test_set, lake_set, sel_cls_idx, num_cls = load_dataset_custom(datadir, dataset_name, feature, split_cfg, True, False)
        print("selected classes are: ", sel_cls_idx)
    if(feature=="duplicate" or feature=="vanilla"):
        sel_cls_idx = None
        if(strategy == "SIM" or strategy=="random"):
            train_set, val_set, test_set, lake_set, num_cls = load_dataset_custom(datadir, dataset_name, feature, split_cfg)
        elif(strategy=="AL"):
            X_tr, y_tr, X_val, y_val, X_unlabeled, y_unlabeled, train_set, val_set, test_set, lake_set, num_cls = load_dataset_custom(datadir, dataset_name, feature, split_cfg, True)
        
    N = len(train_set)
    trn_batch_size = 20
    val_batch_size = 10
    tst_batch_size = 100

    trainloader = torch.utils.data.DataLoader(train_set, batch_size=trn_batch_size,
                                              shuffle=True, pin_memory=True)

    valloader = torch.utils.data.DataLoader(val_set, batch_size=val_batch_size, 
                                            shuffle=False, pin_memory=True)

    tstloader = torch.utils.data.DataLoader(test_set, batch_size=tst_batch_size,
                                             shuffle=False, pin_memory=True)
    
    lakeloader = torch.utils.data.DataLoader(lake_set, batch_size=tst_batch_size,
                                         shuffle=False, pin_memory=True)
    true_lake_set = copy.deepcopy(lake_set)
    # Budget for subset selection
    bud = budget
   
    # Variables to store accuracies
    fulltrn_losses = np.zeros(num_epochs)
    val_losses = np.zeros(num_epochs)
    tst_losses = np.zeros(num_epochs)
    timing = np.zeros(num_epochs)
    val_acc = np.zeros(num_epochs)
    full_trn_acc = np.zeros(num_epochs)
    tst_acc = np.zeros(num_epochs)
    final_tst_predictions = []
    final_tst_classifications = []
    best_val_acc = -1
    csvlog = []
    val_csvlog = []
    # Results logging file
    print_every = 3
    all_logs_dir = 'SMI_active_learning_results_woVal/' + dataset_name  + '/' + feature + '/'+  sf + '/' + str(bud) + '/' + str(run)
    print("Saving results to: ", all_logs_dir)
    subprocess.run(["mkdir", "-p", all_logs_dir])
    exp_name = dataset_name + "_" + feature +  "_" + strategy + "_" + str(len(sel_cls_idx))  +"_" + sf +  '_budget:' + str(bud) + '_epochs:' + str(num_epochs) + '_linear:'  + str(linearLayer) + '_runs' + str(run)
    print(exp_name)
    res_dict = {"dataset":data_name, "feature":feature, "sel_func":sf, "sel_budget":budget, "num_selections":num_epochs, "model":model_name, "learning_rate":learning_rate, "setting":split_cfg, "all_class_acc":None, "test_acc":[],"sel_per_cls":[], "sel_cls_idx":sel_cls_idx.tolist()}
    # Model Creation
    model = create_model(model_name, num_cls, device)
    model1 = create_model(model_name, num_cls, device)
    if(strategy == "AL"):
        strategy_args = {'batch_size' : budget, 'lr':float(0.001)}
        if(sf=="badge"):
            strategy_sel = BADGE(X_tr.astype(np.float64), y_tr, X_unlabeled.astype(np.float64), model, DataHandler_CIFAR10, num_cls, device, strategy_args)
        elif(sf=="us"):
            strategy_sel = EntropySampling(X_tr, y_tr, X_unlabeled, model, DataHandler_CIFAR10, num_cls, device, strategy_args)
        elif(sf=="glister" or sf=="glister-tss"):
            strategy_sel = GLISTER(X_tr, y_tr, X_unlabeled, model, DataHandler_CIFAR10, num_cls, device, strategy_args, valid=True, X_val=X_val, Y_val=y_val, typeOf='rand', lam=0.1)
        elif(sf=="gradmatch-tss"):
            strategy_args = {'batch_size' : 1, 'lr':float(0.01)}
            strategy_sel = GradMatchActive(X_tr, y_tr, X_unlabeled, model, F.cross_entropy, DataHandler_CIFAR10, num_cls, strategy_args["lr"], "PerBatch", False, strategy_args, valid=True, X_val=X_val, Y_val=y_val)
    # Loss Functions
    criterion, criterion_nored = loss_function()

    # Getting the optimizer and scheduler
#     optimizer, scheduler = optimizer_with_scheduler(model, num_epochs, learning_rate)
    optimizer = optimizer_without_scheduler(model, learning_rate)
    private_set = []
    #overlap vars
    prev_idx = None
    prev_idx_hist = []
    sel_hist = []
    per_ep_overlap = []
    overall_overlap = []
    idx_tracker = np.array(list(range(len(lake_set))))

    for i in range(num_epochs):
        print("AL epoch: ", i)
        tst_loss = 0
        tst_correct = 0
        tst_total = 0
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        if(i==0):
            print("initial training epoch")
            if(os.path.exists(initModelPath)):
                model.load_state_dict(torch.load(initModelPath, map_location=device))
                print("Init model loaded from disk, skipping init training: ", initModelPath)
                with torch.no_grad():
                    final_val_predictions = []
                    final_val_classifications = []
                    for batch_idx, (inputs, targets) in enumerate(valloader):
                        inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                        val_loss += loss.item()
                        _, predicted = outputs.max(1)
                        val_total += targets.size(0)
                        val_correct += predicted.eq(targets).sum().item()
                        final_val_predictions += list(predicted.cpu().numpy())
                        final_val_classifications += list(predicted.eq(targets).cpu().numpy())
  
                    if((val_correct/val_total) > best_val_acc):
                        final_tst_predictions = []
                        final_tst_classifications = []
                    for batch_idx, (inputs, targets) in enumerate(tstloader):
                        inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                        outputs = model(inputs)
                        loss = criterion(outputs, targets)
                        tst_loss += loss.item()
                        _, predicted = outputs.max(1)
                        tst_total += targets.size(0)
                        tst_correct += predicted.eq(targets).sum().item()
                        if((val_correct/val_total) > best_val_acc):
                            final_tst_predictions += list(predicted.cpu().numpy())
                            final_tst_classifications += list(predicted.eq(targets).cpu().numpy())                
                    if((val_correct/val_total) > best_val_acc):
                        best_val_acc = (val_correct/val_total)
                    val_acc[i] = val_correct / val_total
                    tst_acc[i] = tst_correct / tst_total
                    val_losses[i] = val_loss
                    tst_losses[i] = tst_loss
                    res_dict["test_acc"].append(tst_acc[i])
                continue
        else:
#             if(full_trn_acc[i-1] >= 0.99): #The model has already trained on the seed dataset
            #use misclassifications on validation set as queries
            #compute hypothesized labels
            hyp_lake_labels = []
            for batch_idx, (inputs, _) in enumerate(lakeloader):
                inputs = inputs.to(device)
                outputs = model(inputs)
                _, predicted = outputs.max(1)
                hyp_lake_labels += list(predicted)
            print(len(hyp_lake_labels))
            lake_set = custom_subset(lake_set, list(range(len(hyp_lake_labels))), torch.Tensor(hyp_lake_labels))
            lakeloader = torch.utils.data.DataLoader(lake_set, batch_size=tst_batch_size, shuffle=False, pin_memory=True)
#             sys.exit()
            #compute the error log before every selection
            if(computeErrorLog):
                tst_err_log, val_err_log, val_class_err_idxs = find_err_per_class(test_set, val_set, final_val_classifications, final_val_predictions, final_tst_classifications, final_tst_predictions, all_logs_dir, sf+"_"+str(bud))
                csvlog.append(tst_err_log)
                val_csvlog.append(val_err_log)
            ####SIM####
            if(strategy=="SIM" or strategy=="SF"):
                if(sf.endswith("mi")):
                    if(feature=="classimb"):
                        #make a dataloader for the misclassifications - only for experiments with targets
                        miscls_set = getMisclsSet(val_set, val_class_err_idxs, sel_cls_idx)
                        misclsloader = torch.utils.data.DataLoader(miscls_set, batch_size=1, shuffle=False, pin_memory=True)
                        setf_model = DataSelectionStrategy(lakeloader, misclsloader, model1, num_cls, linearLayer, device) #set last arg to true for linear layer
                    else:
                        setf_model = DataSelectionStrategy(lakeloader, valloader, model1, num_cls, linearLayer, device)
                elif(sf.endswith("cg")): #atleast one selection must be done for private set in cond gain functions
                    if(len(private_set)!=0):
                        privateSetloader = torch.utils.data.DataLoader(private_set, batch_size=1, shuffle=False, pin_memory=True)
                        setf_model = DataSelectionStrategy(lakeloader, privateSetloader, model1, num_cls, linearLayer, device) #set last arg to true for linear layer
                    else:
                        #compute subset with private set a NULL
                        setf_model = DataSelectionStrategy(lakeloader, valloader, model1, num_cls, linearLayer, device)
                else:
                    setf_model = DataSelectionStrategy(lakeloader, valloader, model1, num_cls, linearLayer, device)
                start_time = time.time()
                cached_state_dict = copy.deepcopy(model.state_dict())
                clone_dict = copy.deepcopy(model.state_dict())
                #update the selection strategy model with new params for gradient computation
                setf_model.update_model(clone_dict)
                if(sf.endswith("mi")): #SMI functions need the target set gradients
                    setf_model.compute_gradients(valid=True, batch=False, perClass=False)
                    print("train minibatch gradients shape ", setf_model.grads_per_elem.shape)
#                     print(setf_model.grads_per_elem)
                    print("val minibatch gradients shape ", setf_model.val_grads_per_elem.shape)
#                     print(setf_model.val_grads_per_elem)
                    if(doublePrecision):
                        train_val_kernel = kernel(setf_model.grads_per_elem.double(), setf_model.val_grads_per_elem.double())#img_query_kernel
                    else:
                        train_val_kernel = kernel(setf_model.grads_per_elem, setf_model.val_grads_per_elem)#img_query_kernel
                    numQueryPrivate = train_val_kernel.shape[1]
                elif(sf.endswith("cg")):
                    if(len(private_set)!=0):
                        setf_model.compute_gradients(valid=True, batch=False, perClass=False)
                        print("train minibatch gradients shape ", setf_model.grads_per_elem.shape)
                        print("val minibatch gradients shape ", setf_model.val_grads_per_elem.shape)
                        if(doublePrecision):
                            train_val_kernel = kernel(setf_model.grads_per_elem.double(), setf_model.val_grads_per_elem.double())#img_query_kernel
                        else:
                            train_val_kernel = kernel(setf_model.grads_per_elem, setf_model.val_grads_per_elem)#img_query_kernel
                        numQueryPrivate = train_val_kernel.shape[1]
                    else:
#                         assert(((i + 1)/select_every)==1)
                        setf_model.compute_gradients(valid=False, batch=False, perClass=False)
                        train_val_kernel = []
                        numQueryPrivate = 0
                else: # For other submodular functions needing only image kernel
                    setf_model.compute_gradients(valid=False, batch=False, perClass=False)
                    train_val_kernel = []
                    numQueryPrivate = 0

                kernel_time = time.time()
                if(doublePrecision):
                    train_kernel = kernel(setf_model.grads_per_elem.double(), setf_model.grads_per_elem.double()) #img_img_kernel
                else:
                    train_kernel = kernel(setf_model.grads_per_elem, setf_model.grads_per_elem) #img_img_kernel
                if(sf=="logdetmi" or sf=="logdetcg"):
                    if(sf=="logdetcg"):
                        if(len(private_set)!=0):
                            val_kernel = kernel(setf_model.val_grads_per_elem, setf_model.val_grads_per_elem)#query_query_kernel
                        else:
                            val_kernel = []
                    if(sf=="logdetmi"):
                        val_kernel = kernel(setf_model.val_grads_per_elem, setf_model.val_grads_per_elem)#query_query_kernel
                    save_kernel_hdf5(train_kernel, train_val_kernel, val_kernel)
                else:
                    save_kernel_hdf5(train_kernel, train_val_kernel)
                print("kernel compute time: ", time.time()-kernel_time)
                #call the c++ exec to read kernel and compute subset of selected minibatches
                subset = getSMI_ss(datkbuildPath, exePath, os.getcwd(), budget, str(numQueryPrivate), sf)
                print(subset[:5])
                model.load_state_dict(cached_state_dict)
                if(sf.endswith("cg")): #for first selection
                    if(len(private_set)==0):
                        private_set = custom_subset(lake_set, subset, torch.Tensor(lake_set.targets.float())[subset])
                    else:
                        private_set = getPrivateSet(lake_set, subset, private_set)
                    print("size of private set: ", len(private_set))

    #           temp = np.array(list(trainloader.batch_sampler))[subset] #if per batch
            ###AL###
            elif(strategy=="AL"):
                if(sf=="glister-tss" or sf=="gradmatch-tss"):
                    miscls_X_val, miscls_y_val = getMisclsSetNumpy(X_val, y_val, val_class_err_idxs, sel_cls_idx)
                    if(sf=="glister-tss"): strategy_sel = GLISTER(X_tr, y_tr, X_unlabeled, model, DataHandler_CIFAR10, num_cls, device, strategy_args, valid=True, X_val=miscls_X_val, Y_val=miscls_y_val, typeOf='rand', lam=0.1)
                    if(sf=="gradmatch-tss"): strategy_sel = GradMatchActive(X_tr, y_tr, X_unlabeled, model, F.cross_entropy, DataHandler_CIFAR10, num_cls, strategy_args["lr"], "PerBatch", False, strategy_args, valid=True, X_val=miscls_X_val, Y_val=miscls_y_val)
                    print("reinit AL with targeted miscls samples")
                strategy_sel.update_model(model)
                if(sf=="badge" or sf=="glister" or sf=="glister-tss"):
                    subset = strategy_sel.select(budget)
                if(sf=="us"):
                    subset = list(strategy_sel.select(budget).cpu().numpy())
                if(sf=="gradmatch-tss"):
                    subset = strategy_sel.select(budget, False) #Fixed weight gradmatch
                print(len(subset), " samples selected")
                X_tr = np.concatenate((X_tr, X_unlabeled[subset]), axis=0)
                X_unlabeled = np.delete(X_unlabeled, subset, axis = 0)
                y_tr = np.concatenate((y_tr, y_unlabeled[subset]), axis = 0)
                y_unlabeled = np.delete(y_unlabeled, subset, axis = 0)
                strategy_sel.update_data(X_tr, y_tr, X_unlabeled)
            elif(strategy=="random"):
                subset = np.random.choice(np.array(list(range(len(lake_set)))), size=budget, replace=False)
            if(i>1 and sf.endswith("cg")):
                per_ep, overall = check_overlap(prev_idx, prev_idx_hist, list(idx_tracker[subset]))
                per_ep_overlap.append(per_ep)
                overall_overlap.append(overall)
            if(feature=="ood"): #remove ood points from the subset
                subset = remove_ood_points(lake_set, subset, sel_cls_idx)
            lake_subset_idxs = subset #indices wrt to lake that need to be removed from the lake
            print("selEpoch: %d, Selection Ended at:" % (i), str(datetime.datetime.now()))
            perClsSel = getPerClassSel(true_lake_set, subset, num_cls)
            res_dict['sel_per_cls'].append(perClsSel)
            prev_idx = list(idx_tracker[subset])
            prev_idx_hist += list(idx_tracker[subset])
            sel_hist.append(list(idx_tracker[subset]))
            idx_tracker = np.delete(idx_tracker, subset, axis=0)
            
            #augment the train_set with selected indices from the lake
            if(feature=="classimb"):
                train_set, lake_set, true_lake_set = aug_train_subset(train_set, lake_set, true_lake_set, subset, lake_subset_idxs, budget, True) #aug train with random if budget is not filled
            else:
                train_set, lake_set, true_lake_set = aug_train_subset(train_set, lake_set, true_lake_set, subset, lake_subset_idxs, budget)
            print("After augmentation, size of train_set: ", len(train_set), " lake set: ", len(lake_set))
#           Reinit train and lake loaders with new splits and reinit the model
            trainloader = torch.utils.data.DataLoader(train_set, batch_size=trn_batch_size, shuffle=True, pin_memory=True)
            lakeloader = torch.utils.data.DataLoader(lake_set, batch_size=tst_batch_size, shuffle=False, pin_memory=True)
            assert(len(idx_tracker)==len(lake_set))
#             model =  model.apply(weight_reset).cuda()
            model = create_model(model_name, num_cls, device)
            optimizer = optimizer_without_scheduler(model, learning_rate)
                
        #Start training
        start_time = time.time()
        num_ep=1
        while(full_trn_acc[i]<0.99 and num_ep<150):
            model.train()
            for batch_idx, (inputs, targets) in enumerate(trainloader):
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                # Variables in Pytorch are differentiable.
                inputs, target = Variable(inputs), Variable(inputs)
                # This will zero out the gradients for this batch.
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
#             scheduler.step()
          
            full_trn_loss = 0
            full_trn_correct = 0
            full_trn_total = 0
            model.eval()
            with torch.no_grad():
                for batch_idx, (inputs, targets) in enumerate(trainloader):
                    inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                    outputs = model(inputs)
                    loss = criterion(outputs, targets)
                    full_trn_loss += loss.item()
                    _, predicted = outputs.max(1)
                    full_trn_total += targets.size(0)
                    full_trn_correct += predicted.eq(targets).sum().item()
                full_trn_acc[i] = full_trn_correct / full_trn_total
                print("Selection Epoch ", i, " Training epoch [" , num_ep, "]" , " Training Acc: ", full_trn_acc[i], end="\r")
                num_ep+=1
            timing[i] = time.time() - start_time
        with torch.no_grad():
            final_val_predictions = []
            final_val_classifications = []
            for batch_idx, (inputs, targets) in enumerate(valloader):
                # print(batch_idx)
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                val_total += targets.size(0)
                val_correct += predicted.eq(targets).sum().item()
    #                 if(i == (num_epochs-1)):
                final_val_predictions += list(predicted.cpu().numpy())
                final_val_classifications += list(predicted.eq(targets).cpu().numpy())
                # sys.exit()

#             if((val_correct/val_total) > best_val_acc):
            final_tst_predictions = []
            final_tst_classifications = []
            for batch_idx, (inputs, targets) in enumerate(tstloader):
                inputs, targets = inputs.to(device), targets.to(device, non_blocking=True)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                tst_loss += loss.item()
                _, predicted = outputs.max(1)
                tst_total += targets.size(0)
                tst_correct += predicted.eq(targets).sum().item()
#                 if((val_correct/val_total) > best_val_acc):
    #                 if(i == (num_epochs-1)):
                final_tst_predictions += list(predicted.cpu().numpy())
                final_tst_classifications += list(predicted.eq(targets).cpu().numpy())                
#             if((val_correct/val_total) > best_val_acc):
#                 best_val_acc = (val_correct/val_total)
            val_acc[i] = val_correct / val_total
            tst_acc[i] = tst_correct / tst_total
            val_losses[i] = val_loss
            fulltrn_losses[i] = full_trn_loss
            tst_losses[i] = tst_loss
            full_val_acc = list(np.array(val_acc))
            full_timing = list(np.array(timing))
            res_dict["test_acc"].append(tst_acc[i])
            print('Epoch:', i + 1, 'FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time:', full_trn_loss, full_trn_acc[i], val_loss, val_acc[i], tst_loss, tst_acc[i], timing[i])
        if(i==0): 
            print("saving initial model") 
            torch.save(model.state_dict(), initModelPath) #save initial train model if not present
    if(computeErrorLog):
        tst_err_log, val_err_log, val_class_err_idxs = find_err_per_class(test_set, val_set, final_val_classifications, final_val_predictions, final_tst_classifications, final_tst_predictions, all_logs_dir, sf+"_"+str(bud))
        csvlog.append(tst_err_log)
        val_csvlog.append(val_err_log)
        print(csvlog)
        res_dict["all_class_acc"] = csvlog
        res_dict["all_val_class_acc"] = val_csvlog
#         with open(os.path.join(all_logs_dir, exp_name+".csv"), "w") as f:
#             writer = csv.writer(f)
#             writer.writerows(csvlog)
    #save results dir with test acc and per class selections
    with open(os.path.join(all_logs_dir, exp_name+".json"), 'w') as fp:
        json.dump(res_dict, fp)
    return tst_acc, csvlog

# FL2MI

In [56]:
fl2mi_tst, fl2mi_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "SIM",'fl2mi')

SIM fl2mi
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1620 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/fl2mi/30/1
cifar10_classimb_SIM_2_fl2mi_budget:30_epochs:20_linear:False_runs1
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  40.0 57.2
val, test error% for class  3  :  80.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  80.0 50.4
val, test error% for class  6  :  0.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  60.0 28.0
total misclassified ex from imb classes:  10
Per Element Training Gradien

Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([24150, 10])
val minibatch gradients shape  torch.Size([9, 10])
kernel compute time:  20.74738597869873
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -queryDiversityLambda 1 -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer fl2mi -numQueries  9 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[1482, 3092, 7312, 21582, 18259]
selEpoch: 6, Selection Ended at: 2021-04-10 14:26:59.574487
After augmentation, size of train_set:  1800  lake set:  24120
Epoch: 7 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.9758431956288405 0.9905555555555555 12.600911974906921 0.52 312.97203171253204 0.5188 357.740855455

[1910, 1624, 20951, 1851, 21014]
selEpoch: 11, Selection Ended at: 2021-04-10 15:05:08.223909
After augmentation, size of train_set:  1950  lake set:  23970
Epoch: 12 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.2527811396867037 0.9912820512820513 10.54728239774704 0.62 260.0122916698456 0.5575 351.04397892951965
AL epoch:  12
23970
val, test error% for class  0  :  20.0 23.7
val, test error% for class  1  :  60.0 76.8
val, test error% for class  2  :  20.0 46.2
val, test error% for class  3  :  60.0 59.4
val, test error% for class  4  :  60.0 47.0
val, test error% for class  5  :  0.0 44.5
val, test error% for class  6  :  0.0 20.8
val, test error% for class  7  :  20.0 30.4
val, test error% for class  8  :  100.0 81.3
val, test error% for class  9  :  40.0 12.4
total misclassified ex from imb classes:  8
Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([23970, 10])
val

Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([23820, 10])
val minibatch gradients shape  torch.Size([5, 10])
kernel compute time:  19.958787202835083
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -queryDiversityLambda 1 -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer fl2mi -numQueries  5 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[23538, 20532, 21543, 2188, 2817]
selEpoch: 17, Selection Ended at: 2021-04-10 15:51:51.121154
After augmentation, size of train_set:  2130  lake set:  23790
Epoch: 18 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 4.330385208828375 0.9901408450704225 8.027576804161072 0.7 206.33323502540588 0.6023 377.91502285

# FL1MI

In [67]:
fl1mi_tst, fl1mi_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "SIM",'fl1mi')

SIM fl1mi
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1620 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/fl1mi/30/1
cifar10_classimb_SIM_2_fl1mi_budget:30_epochs:20_linear:True_runs1
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  80.0 81.6
val, test error% for class  2  :  60.0 57.2
val, test error% for class  3  :  60.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  60.0 50.4
val, test error% for class  6  :  40.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  80.0 90.2
val, test error% for class  9  :  80.0 28.0
total misclassified ex from imb classes:  8
Per Element Training Gradient C

Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([24150, 5130])
val minibatch gradients shape  torch.Size([8, 5130])
kernel compute time:  71.1513683795929
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer fl1mi -numQueries 8 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[7517, 2982, 23352, 18181, 6912]
selEpoch: 6, Selection Ended at: 2021-04-11 14:01:05.177010
After augmentation, size of train_set:  1800  lake set:  24120
Epoch: 7 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.187944386852905 0.99 14.610131859779358 0.64 314.9602732658386 0.5075 364.07353496551514
AL epoch:  7
24120
val, test err

Epoch: 12 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.15355181961786 0.9958974358974358 10.800266042351723 0.6 254.41640627384186 0.5613 374.1253921985626
AL epoch:  12
23970
val, test error% for class  0  :  20.0 21.1
val, test error% for class  1  :  60.0 77.0
val, test error% for class  2  :  0.0 48.3
val, test error% for class  3  :  20.0 55.7
val, test error% for class  4  :  80.0 49.9
val, test error% for class  5  :  80.0 47.0
val, test error% for class  6  :  0.0 20.9
val, test error% for class  7  :  20.0 27.1
val, test error% for class  8  :  100.0 76.8
val, test error% for class  9  :  20.0 14.9
total misclassified ex from imb classes:  8
Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([23970, 5130])
val minibatch gradients shape  torch.Size([8, 5130])
kernel compute time:  63.98136496543884
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/bu

kernel compute time:  36.12892031669617
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer fl1mi -numQueries 5 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[14964, 8468, 2929, 20867, 20921]
selEpoch: 17, Selection Ended at: 2021-04-11 15:49:08.909937
After augmentation, size of train_set:  2130  lake set:  23790
Epoch: 18 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.6469205684261397 0.9901408450704225 7.798752456903458 0.64 215.74739289283752 0.6004 440.3640010356903
AL epoch:  18
23790
val, test error% for class  0  :  20.0 18.5
val, test error% for class  1  :  60.0 63.4
val, test error% for class  2  :  20.0 44.8
val, test error% for class  3  :  20.0 54.0
val, test error% for class  4  :  60.0 37.

# BADGE

In [None]:
# train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, False, "SIM",'gccg')
badge_tst, badge_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "AL","badge")

# US

In [9]:
us_tst, us_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "AL","us")

AL us
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1640 Val size:  20 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/us/50/1
cifar10_classimb_AL_2_us_budget:50_epochs:20_linear:True_runs1
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  0 57.2
val, test error% for class  3  :  0 53.1
val, test error% for class  4  :  0 59.3
val, test error% for class  5  :  0 50.4
val, test error% for class  6  :  0 30.1
val, test error% for class  7  :  0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  0 28.0
50  samples selected
selEpoch: 1, Selection Ended at: 2021-04-11 16:44:13.599677
After augmentation, size of

50  samples selected
selEpoch: 11, Selection Ended at: 2021-04-11 17:47:09.689370
After augmentation, size of train_set:  2190  lake set:  23750
Epoch: 12 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 4.811365076806396 0.991324200913242 0.009198523592203856 1.0 204.56159508228302 0.6071 440.17178201675415
AL epoch:  12
23750
val, test error% for class  0  :  0 31.7
val, test error% for class  1  :  0.0 71.0
val, test error% for class  2  :  0 39.8
val, test error% for class  3  :  0 60.0
val, test error% for class  4  :  0 36.6
val, test error% for class  5  :  0 38.3
val, test error% for class  6  :  0 23.2
val, test error% for class  7  :  0 38.2
val, test error% for class  8  :  0.0 44.8
val, test error% for class  9  :  0 9.3
50  samples selected
selEpoch: 12, Selection Ended at: 2021-04-11 17:55:05.134533
After augmentation, size of train_set:  2240  lake set:  23700
Epoch: 13 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 4.209276114357635 0.9901785714285715 0.515597

# GLISTER

In [7]:
us_tst, us_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "AL","glister-tss")

AL glister-tss
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1620 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results/cifar10/classimb/glister-tss/30/1
cifar10_classimb_AL_2_glister-tss_budget:30_epochs:10_runs1
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  40.0 57.2
val, test error% for class  3  :  60.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  60.0 50.4
val, test error% for class  6  :  0.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  60.0 28.0
total misclassified ex from imb classes:  10
reinit Glister with targeted miscls s

  Y_Val = torch.tensor(self.Y_Val,device=self.device)


30  samples selected
selEpoch: 1, Selection Ended at: 2021-04-07 00:45:54.451161
After augmentation, size of train_set:  1650  lake set:  24270
Epoch: 2 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.5631852811202407 0.9903030303030304 14.006057679653168 0.44 312.36546564102173 0.4904 267.2893898487091
AL epoch:  2
val, test error% for class  0  :  40.0 21.2
val, test error% for class  1  :  100.0 94.1
val, test error% for class  2  :  0.0 56.5
val, test error% for class  3  :  80.0 62.3
val, test error% for class  4  :  60.0 40.8
val, test error% for class  5  :  60.0 51.5
val, test error% for class  6  :  20.0 38.8
val, test error% for class  7  :  20.0 36.9
val, test error% for class  8  :  100.0 85.4
val, test error% for class  9  :  80.0 22.1
total misclassified ex from imb classes:  10
reinit Glister with targeted miscls samples
30  samples selected
selEpoch: 2, Selection Ended at: 2021-04-07 00:50:38.707412
After augmentation, size of train_set:  1680  lake set:  24240
E

# GradMatch-Active

In [25]:
gradmatch_tst, gradmatch_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "AL","gradmatch-tss")

AL gradmatch-tss
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1620 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/gradmatch-tss/30/1
cifar10_classimb_AL_2_gradmatch-tss_budget:30_epochs:10_runs1
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  40.0 57.2
val, test error% for class  3  :  80.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  60.0 50.4
val, test error% for class  6  :  20.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  80.0 90.2
val, test error% for class  9  :  80.0 28.0
total misclassified ex from imb classes:  9
reinit AL with targeted mi

# GCMI+DIV

In [58]:
gcmidiv_tst, gcmidiv_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "SIM",'div-gcmi')

SIM div-gcmi
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1620 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/div-gcmi/30/1
cifar10_classimb_SIM_2_div-gcmi_budget:30_epochs:20_linear:True_runs1
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  20.0 57.2
val, test error% for class  3  :  60.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  80.0 50.4
val, test error% for class  6  :  20.0 30.1
val, test error% for class  7  :  20.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  40.0 28.0
total misclassified ex from imb classes:  10
Per Element Trainin

Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([24150, 5130])
val minibatch gradients shape  torch.Size([7, 5130])
kernel compute time:  37.90722179412842
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer div-gcmi -numQueries 7 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[21149, 21271, 2322, 231, 896]
selEpoch: 6, Selection Ended at: 2021-04-10 17:19:47.696726
After augmentation, size of train_set:  1800  lake set:  24120
Epoch: 7 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.336410054238513 0.99 14.934132188558578 0.58 346.2347106933594 0.5091 394.96116042137146
AL epoch:  7
24120
val, test e

Epoch: 12 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 4.058033883920871 0.9902564102564102 13.530300498008728 0.62 253.3195549249649 0.5561 404.50895738601685
AL epoch:  12
23970
val, test error% for class  0  :  0.0 28.5
val, test error% for class  1  :  60.0 76.7
val, test error% for class  2  :  20.0 49.9
val, test error% for class  3  :  40.0 45.6
val, test error% for class  4  :  80.0 40.5
val, test error% for class  5  :  40.0 53.4
val, test error% for class  6  :  0.0 28.3
val, test error% for class  7  :  40.0 28.2
val, test error% for class  8  :  80.0 81.9
val, test error% for class  9  :  20.0 10.9
total misclassified ex from imb classes:  7
Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([23970, 5130])
val minibatch gradients shape  torch.Size([7, 5130])
kernel compute time:  37.08492183685303
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/b

kernel compute time:  35.37218976020813
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer div-gcmi -numQueries 6 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[1735, 186, 3120, 20897, 20828]
selEpoch: 17, Selection Ended at: 2021-04-10 18:55:26.581608
After augmentation, size of train_set:  2130  lake set:  23790
Epoch: 18 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 4.030115187866613 0.9915492957746479 7.868071585893631 0.66 195.69471347332 0.6158 318.12108159065247
AL epoch:  18
23790
val, test error% for class  0  :  0.0 17.9
val, test error% for class  1  :  40.0 60.5
val, test error% for class  2  :  0.0 42.5
val, test error% for class  3  :  40.0 46.9
val, test error% for class  4  :  40.0 38.0
va

# GCMI

In [61]:
gcmi_tst, gcmi_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, 1, device, computeClassErrorLog, "SIM",'gcmi')

SIM gcmi
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1620 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/gcmi/30/1
cifar10_classimb_SIM_2_gcmi_budget:30_epochs:20_linear:True_runs1
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  40.0 25.4
val, test error% for class  1  :  80.0 81.6
val, test error% for class  2  :  40.0 57.2
val, test error% for class  3  :  80.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  60.0 50.4
val, test error% for class  6  :  0.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  60.0 28.0
total misclassified ex from imb classes:  9
Per Element Training Gradient Comp

Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([24150, 5130])
val minibatch gradients shape  torch.Size([7, 5130])
kernel compute time:  70.97694611549377
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer gcmi -numQueries 7 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[19945, 23925, 1502, 160, 2535]
selEpoch: 6, Selection Ended at: 2021-04-10 22:02:21.706729
After augmentation, size of train_set:  1800  lake set:  24120
Epoch: 7 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.540118155768141 0.9905555555555555 12.770921468734741 0.5 274.05703568458557 0.5177 348.7776484489441
AL epoch:  7
24120
v

Epoch: 12 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.927917805733159 0.9907692307692307 8.247603237628937 0.66 229.21962893009186 0.5904 353.33984375
AL epoch:  12
23970
val, test error% for class  0  :  20.0 25.9
val, test error% for class  1  :  40.0 51.7
val, test error% for class  2  :  0.0 41.6
val, test error% for class  3  :  60.0 51.3
val, test error% for class  4  :  60.0 51.1
val, test error% for class  5  :  80.0 54.2
val, test error% for class  6  :  0.0 27.8
val, test error% for class  7  :  0.0 20.8
val, test error% for class  8  :  40.0 67.8
val, test error% for class  9  :  40.0 17.4
total misclassified ex from imb classes:  4
Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([23970, 5130])
val minibatch gradients shape  torch.Size([4, 5130])
kernel compute time:  64.10424590110779
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/ci

kernel compute time:  69.5415735244751
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode query -naiveOrRandom naive -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer gcmi -numQueries 6 -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5
[2916, 9394, 2898, 1066, 20867]
selEpoch: 17, Selection Ended at: 2021-04-10 23:32:21.401922
After augmentation, size of train_set:  2130  lake set:  23790
Epoch: 18 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.8904351675882936 0.9953051643192489 7.180693745613098 0.64 166.79677760601044 0.6585 400.87427377700806
AL epoch:  18
23790
val, test error% for class  0  :  0.0 20.6
val, test error% for class  1  :  20.0 38.8
val, test error% for class  2  :  40.0 39.1
val, test error% for class  3  :  60.0 48.3
val, test error% for class  4  :  60.0 37.2
va

# LOGDETMI

In [13]:
logdetmi_tst, logdetmi_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, run, device, computeClassErrorLog, "SIM",'logdetmi')

SIM logdetmi
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1620 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/logdetmi/30/2
cifar10_classimb_SIM_2_logdetmi_budget:30_epochs:20_linear:True_runs2
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  40.0 57.2
val, test error% for class  3  :  80.0 53.1
val, test error% for class  4  :  80.0 59.3
val, test error% for class  5  :  60.0 50.4
val, test error% for class  6  :  40.0 30.1
val, test error% for class  7  :  0.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  80.0 28.0
total misclassified ex from imb classes:  10
Per Element Training

Epoch: 6 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.355578598100692 0.9903954802259887 8.831465601921082 0.52 231.597430229187 0.5767 316.2437698841095
AL epoch:  6
24150
val, test error% for class  0  :  40.0 31.4
val, test error% for class  1  :  60.0 66.4
val, test error% for class  2  :  20.0 50.0
val, test error% for class  3  :  60.0 53.5
val, test error% for class  4  :  80.0 30.9
val, test error% for class  5  :  60.0 37.1
val, test error% for class  6  :  0.0 30.2
val, test error% for class  7  :  40.0 36.1
val, test error% for class  8  :  100.0 75.7
val, test error% for class  9  :  20.0 12.0
total misclassified ex from imb classes:  8
Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([24150, 5130])
val minibatch gradients shape  torch.Size([8, 5130])
kernel compute time:  66.08659362792969
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/buil

Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([24000, 5130])
val minibatch gradients shape  torch.Size([7, 5130])
kernel compute time:  70.03327083587646
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector_ng -mode query -naiveOrRandom naive -logDetLambda 1 -magnificationLambda 1 -numSummaries 1 -budget 30 -queryPrivacyOptimizer logdetmi -numQueries  7  -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.hdf5 -queryKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_target_kernel.hdf5 -queryqueryKernelFile /home/snk170001/bioml/dss/notebooks/smi_target_kernel.hdf5
[21006, 22825, 347, 2609, 21252]
selEpoch: 11, Selection Ended at: 2021-04-11 22:07:24.136771
After augmentation, size of train_set:  1950  lake set:  23970
Epoch: 12 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.9187086641322

[745, 11336, 2334, 1136, 21891]
selEpoch: 16, Selection Ended at: 2021-04-11 22:53:20.486995
After augmentation, size of train_set:  2100  lake set:  23820
Epoch: 17 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 4.629454983281903 0.99 9.015868067741394 0.7 195.2881383895874 0.6151 418.53379821777344
AL epoch:  17
23820
val, test error% for class  0  :  0.0 23.7
val, test error% for class  1  :  40.0 37.8
val, test error% for class  2  :  0.0 44.3
val, test error% for class  3  :  20.0 55.5
val, test error% for class  4  :  80.0 48.1
val, test error% for class  5  :  20.0 59.8
val, test error% for class  6  :  20.0 17.2
val, test error% for class  7  :  40.0 38.8
val, test error% for class  8  :  60.0 44.6
val, test error% for class  9  :  20.0 15.1
total misclassified ex from imb classes:  5
Per Element Training Gradient Computation is Completed
Per Element Validation Gradient Computation is Completed
train minibatch gradients shape  torch.Size([23820, 5130])
val minibatch gradi

# FL

In [10]:
fl_tst, fl_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, run, device, computeClassErrorLog, "SF",'fl')

SF fl
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1630 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/fl/30/2
cifar10_classimb_SF_2_fl_budget:30_epochs:20_linear:True_runs2
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  40.0 57.2
val, test error% for class  3  :  60.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  40.0 50.4
val, test error% for class  6  :  20.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  60.0 28.0
Per Element Training Gradient Computation is Completed
kernel compute time:  70.4567

[17006, 6531, 22774, 13147, 14731]
selEpoch: 7, Selection Ended at: 2021-04-12 09:53:35.942153
After augmentation, size of train_set:  1840  lake set:  24090
Epoch: 8 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.8597903322661296 0.9923913043478261 9.220510989427567 0.72 249.1334228515625 0.5507 349.70278906822205
AL epoch:  8
24090
val, test error% for class  0  :  0.0 21.9
val, test error% for class  1  :  0.0 69.1
val, test error% for class  2  :  40.0 49.3
val, test error% for class  3  :  80.0 56.1
val, test error% for class  4  :  80.0 46.8
val, test error% for class  5  :  20.0 36.8
val, test error% for class  6  :  0.0 28.9
val, test error% for class  7  :  20.0 29.5
val, test error% for class  8  :  0.0 81.7
val, test error% for class  9  :  40.0 29.2
Per Element Training Gradient Computation is Completed
kernel compute time:  70.2014229297638
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector_ng -mode generic -naiveOrRandom nai

Epoch: 15 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.945526034454815 0.9926829268292683 5.081322960555553 0.74 264.87732088565826 0.5518 341.56500720977783
AL epoch:  15
23880
val, test error% for class  0  :  0.0 26.7
val, test error% for class  1  :  0.0 79.6
val, test error% for class  2  :  20.0 38.8
val, test error% for class  3  :  60.0 58.4
val, test error% for class  4  :  60.0 37.8
val, test error% for class  5  :  60.0 42.3
val, test error% for class  6  :  0.0 27.7
val, test error% for class  7  :  20.0 28.6
val, test error% for class  8  :  0.0 92.2
val, test error% for class  9  :  40.0 16.1
Per Element Training Gradient Computation is Completed
kernel compute time:  71.96464157104492
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector_ng -mode generic -naiveOrRandom naive -logDetLambda 1 -numSummaries 1 -budget 30 -genericOptimizer fl -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_ke

# GC

In [11]:
gc_tst, gc_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, run, device, computeClassErrorLog, "SF",'gc')

SF gc
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1630 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/gc/30/2
cifar10_classimb_SF_2_gc_budget:30_epochs:20_linear:True_runs2
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  60.0 57.2
val, test error% for class  3  :  80.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  40.0 50.4
val, test error% for class  6  :  20.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  60.0 28.0
Per Element Training Gradient Computation is Completed
kernel compute time:  72.8688

[18105, 18463, 22698, 19126, 22297]
selEpoch: 7, Selection Ended at: 2021-04-12 12:39:28.201731
After augmentation, size of train_set:  1840  lake set:  24090
Epoch: 8 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 3.567132534692064 0.9907608695652174 6.095535598695278 0.72 271.7960410118103 0.5308 320.89982295036316
AL epoch:  8
24090
val, test error% for class  0  :  0.0 20.2
val, test error% for class  1  :  0.0 84.3
val, test error% for class  2  :  40.0 53.2
val, test error% for class  3  :  60.0 58.5
val, test error% for class  4  :  80.0 42.9
val, test error% for class  5  :  60.0 52.1
val, test error% for class  6  :  0.0 24.9
val, test error% for class  7  :  20.0 27.7
val, test error% for class  8  :  0.0 90.8
val, test error% for class  9  :  20.0 14.6
Per Element Training Gradient Computation is Completed
kernel compute time:  71.11980938911438
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode generic -naiveOrRandom naive

Epoch: 15 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.406150978873484 0.9941463414634146 5.359792500734329 0.72 252.31265306472778 0.5666 506.09141087532043
AL epoch:  15
23880
val, test error% for class  0  :  20.0 25.5
val, test error% for class  1  :  0.0 73.7
val, test error% for class  2  :  20.0 49.1
val, test error% for class  3  :  80.0 54.5
val, test error% for class  4  :  60.0 49.3
val, test error% for class  5  :  40.0 35.4
val, test error% for class  6  :  0.0 21.6
val, test error% for class  7  :  20.0 27.6
val, test error% for class  8  :  0.0 84.9
val, test error% for class  9  :  40.0 11.8
Per Element Training Gradient Computation is Completed
kernel compute time:  73.47035312652588
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector -mode generic -naiveOrRandom naive -gcLambda 1 -numSummaries 1 -budget 30 -genericOptimizer gc -dontComputeKernel true -imageKernelFile /home/snk170001/bioml/dss/notebooks/smi_lake_kernel.h

# LOGDET

In [None]:
logdet_tst, logdet_csvlog = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, run, device, computeClassErrorLog, "SF",'logdet')

SF logdet
Files already downloaded and verified
Files already downloaded and verified
CIFAR-10 Custom dataset stats: Train size:  1630 Val size:  50 Lake size:  24300
selected classes are:  [8 1]
Saving results to:  SMI_active_learning_results_woVal/cifar10/classimb/logdet/30/2
cifar10_classimb_SF_2_logdet_budget:30_epochs:20_linear:True_runs2
AL epoch:  0
initial training epoch
Init model loaded from disk, skipping init training:  cifar10_ResNet18_0.01_10_200_2
AL epoch:  1
24300
val, test error% for class  0  :  60.0 25.4
val, test error% for class  1  :  100.0 81.6
val, test error% for class  2  :  20.0 57.2
val, test error% for class  3  :  80.0 53.1
val, test error% for class  4  :  60.0 59.3
val, test error% for class  5  :  60.0 50.4
val, test error% for class  6  :  20.0 30.1
val, test error% for class  7  :  40.0 34.8
val, test error% for class  8  :  100.0 90.2
val, test error% for class  9  :  60.0 28.0
Per Element Training Gradient Computation is Completed
kernel compute ti

[16495, 3047, 18822, 12026, 5922]
selEpoch: 7, Selection Ended at: 2021-04-12 15:39:40.649632
After augmentation, size of train_set:  1840  lake set:  24090
Epoch: 8 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 2.95787794678472 0.9902173913043478 8.254347253590822 0.68 254.36517441272736 0.542 397.2929484844208
AL epoch:  8
24090
val, test error% for class  0  :  0.0 17.7
val, test error% for class  1  :  0.0 75.6
val, test error% for class  2  :  40.0 47.8
val, test error% for class  3  :  60.0 56.1
val, test error% for class  4  :  80.0 44.8
val, test error% for class  5  :  40.0 38.8
val, test error% for class  6  :  0.0 38.7
val, test error% for class  7  :  60.0 32.4
val, test error% for class  8  :  0.0 89.8
val, test error% for class  9  :  40.0 16.3
Per Element Training Gradient Computation is Completed
kernel compute time:  70.68901562690735
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector_ng -mode generic -naiveOrRandom naive 

[7154, 13724, 2616, 2992, 4026]
selEpoch: 14, Selection Ended at: 2021-04-12 16:54:17.941147
After augmentation, size of train_set:  2050  lake set:  23880
Epoch: 15 FullTrn,TrainAcc,ValLoss,ValAcc,TstLoss,TstAcc,Time: 4.018952207872644 0.9902439024390244 5.089254334568977 0.74 240.90168356895447 0.5674 470.9889762401581
AL epoch:  15
23880
val, test error% for class  0  :  20.0 22.1
val, test error% for class  1  :  0.0 83.2
val, test error% for class  2  :  40.0 51.3
val, test error% for class  3  :  60.0 43.2
val, test error% for class  4  :  60.0 45.5
val, test error% for class  5  :  40.0 61.5
val, test error% for class  6  :  20.0 25.2
val, test error% for class  7  :  0.0 21.2
val, test error% for class  8  :  0.0 69.0
val, test error% for class  9  :  20.0 10.4
Per Element Training Gradient Computation is Completed
kernel compute time:  73.29601073265076
Executing SIM command:  /home/snk170001/bioml/dss/notebooks/datk/build/cifarSubsetSelector_ng -mode generic -naiveOrRandom na

# Random

In [None]:
# for i in range(1,6):
random_test_acc = train_model_al(datkbuildPath, exePath, num_epochs, data_name, datadir, feature, model_name, budget, split_cfg, learning_rate, run, device, computeClassErrorLog, "random",'random')