In [10]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.utils import to_categorical

import cvxpy as cp
import numpy as np
import scipy.optimize
from scipy.optimize import curve_fit

from water_filling import Waterfilling
from uniform import Uniform
from slice_tuner import SliceTuner
from cnn import CNN

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 17})
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

# Load Fashion-MNIST dataset from keras

In [2]:
def shuffle(data, label):
    shuffle = np.arange(len(data))
    np.random.shuffle(shuffle)
    data = data[shuffle]
    label = label[shuffle]
    return data, label

fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
num_class = len(np.unique(train_labels))
print("Number of slices : %d" % num_class)

y_train_one_hot = to_categorical(train_labels)
fashion_data = (train_images, y_train_one_hot)

initial_data_array = []
val_data_dict = []
add_data_dict = []
    
val_data_num = 500

for i in range(num_class):
    data_num = int(800 / (num_class - i) ** 0.5)
    initial_data_array.append(data_num)
    idx = np.argmax(fashion_data[1], axis=1) == i
    
    val_data_dict.append((fashion_data[0][idx][data_num:data_num+val_data_num], fashion_data[1][idx][data_num:data_num+val_data_num]))
    add_data_dict.append((fashion_data[0][idx][data_num+val_data_num:], fashion_data[1][idx][data_num+val_data_num:]))
    
    if i == 0:
        train_data = fashion_data[0][idx][:data_num]
        train_label = fashion_data[1][idx][:data_num]
        val_data = fashion_data[0][idx][data_num:data_num+val_data_num]
        val_label = fashion_data[1][idx][data_num:data_num+val_data_num]
    else:
        train_data = np.concatenate((train_data, fashion_data[0][idx][:data_num]), axis=0)
        train_label = np.concatenate((train_label, fashion_data[1][idx][:data_num]), axis=0) 
        val_data = np.concatenate((val_data, fashion_data[0][idx][data_num:data_num+val_data_num]), axis=0)
        val_label = np.concatenate((val_label, fashion_data[1][idx][data_num:data_num+val_data_num]), axis=0)
    
train_data, train_label = shuffle(train_data, train_label)

Number of slices : 10


# Define slices

In [3]:
slice_desc = []
a = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
for i in range(num_class):
    slice_desc.append('Slice: %s, Number of data: %d' % (a[i], initial_data_array[i]))
    print(slice_desc[i])

Slice: T-shirt/top, Number of data: 252
Slice: Trouser, Number of data: 266
Slice: Pullover, Number of data: 282
Slice: Dress, Number of data: 302
Slice: Coat, Number of data: 326
Slice: Sandal, Number of data: 357
Slice: Shirt, Number of data: 400
Slice: Sneaker, Number of data: 461
Slice: Bag, Number of data: 565
Slice: Ankle boot, Number of data: 800


# SliceTuner Demo on Fashion-MNIST
## Use 4000 budget, lambda=1, "aggressive" strategy

In [7]:
st = SliceTuner((train_data, train_label), (val_data, val_label), val_data_dict, 
                initial_data_array, num_class, add_data_dict)

cost_func = [1] * num_class
st.selective_collect(budget=4000, k=10, cost_func=cost_func, Lambda=0.1, num_iter=10, 
                     slice_desc=slice_desc, strategy="aggressive", show_figure=False)

[200, 623, 1046, 1470, 1893, 2317, 2740, 3164, 3587, 4011]
[633 183 700 430 670 269 908 137  68   0]
Total Cost: 3998, Remaining Budget: 2
[2 0 0 0 0 0 0 0 0 0]
Total Cost: 2, Remaining Budget: 0
Strategy: aggressive, C: 0.1, Budget: 4000
Loss: 0.51788, Average EER: 0.33658, Max EER: 0.69904



# Baseline: Uniform

In [8]:
uni = Uniform((train_data, train_label), (val_data, val_label), val_data_dict, 
                initial_data_array, num_class, add_data_dict)

cost_func = [1] * num_class
uni.performance(budget=4000, cost_func=cost_func, num_iter=10)

Method: Uniform, Budget: 4000
[400 400 400 400 400 400 400 400 400 400]
[0.6525058686733246, 0.26227460205554964, 0.8486474454402924, 0.48090615570545203, 0.8235934257507326, 0.28539880514144894, 1.419605380296707, 0.29103708863258365, 0.26104407459497453, 0.14815770834684372]
Loss: 0.54732, Average EER: 0.43197, Max EER: 0.96921



# Baseline: Water filling

In [9]:
wf = Waterfilling((train_data, train_label), (val_data, val_label), val_data_dict, 
                initial_data_array, num_class, add_data_dict)
cost_func = [1] * num_class
wf.performance(budget=4000, cost_func=cost_func, num_iter=10)

Method: Water filling, Budget: 4000
[550 535 519 499 475 444 401 340 236   1]
[0.5863466978073121, 0.26531174778938293, 0.8590988457202913, 0.4644279211759567, 0.8195414245128632, 0.25585255622863773, 1.3708500146865843, 0.30778094604611395, 0.26048298478126525, 0.17388643324375153]
Loss: 0.53636, Average EER: 0.41400, Max EER: 0.92721

