In [None]:
import os
import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import numpy as np
import matplotlib.pyplot as plt
import pickle
import tensorflow as tf
from tensorflow.python.keras.backend import set_session
config = tf.compat.v1.ConfigProto() 
config.gpu_options.allow_growth = True  
config.log_device_placement = True  
sess2 = tf.compat.v1.Session(config=config)
set_session(sess2) 
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Activation, Flatten, BatchNormalization
from tensorflow.keras.layers import Conv2D, AveragePooling2D, MaxPooling2D
from tensorflow.keras.regularizers import l2
import tensorflow.compat.v1.keras.backend as K
from mango.tuner import Tuner
from scipy.stats import uniform
from keras_flops import get_flops
import pickle
import csv
import time
import itertools
import random
from sklearn.metrics import roc_auc_score
import eval_functions_eembc
from train_utils import *
from hardware_utils import *

In [None]:
EPOCHS = 500 #model epochs
NAS_EPOCHS = 50 #NAS epochs
BS = 32 #batch size
cifar_10_dir = 'cifar-10-batches-py/' #cifar-10-dataset-directory
device = "NUCLEO_L4R5ZI_P" #hardware name
dirpath='Cifar10_Mbed_Prog/' #mbed program directory
model_name = "trainedResnet_"+device+"_"+".h5"
platform_connected = False #HIL or proxy
quantization = True #use quantization
os.system("mkdir -p trained_models/")


log_file_name = 'log_NAS_Cifar10_'+str(platform_connected)+"_"+device+'.csv' #log file for NAS
if os.path.exists(log_file_name):
    os.remove(log_file_name)
row_write = ['score', 'accuracy','SRAM','Flash','Latency',
             'nstacks','num_filters','kernel_size','batch_norm','act']
with open(log_file_name, 'a', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(row_write)
if os.path.exists(log_file_name[0:-4]+'.p'):
    os.remove(log_file_name[0:-4]+'.p')

## Import Dataset

In [None]:
os.system("python3 perf_samples_loader.py")

In [None]:
train_data, train_filenames, train_labels, test_data, test_filenames, test_labels, label_names = \
    load_cifar_10_data(cifar_10_dir)

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    #brightness_range=(0.9, 1.2),
    #contrast_range=(0.9, 1.2),
    validation_split=0.2
)

print("Train data: ", train_data.shape)
print("Train filenames: ", train_filenames.shape)
print("Train labels: ", train_labels.shape)
print("Test data: ", test_data.shape)
print("Test filenames: ", test_filenames.shape)
print("Test labels: ", test_labels.shape)
print("Label names: ", label_names.shape)

num_plot = 5
f, ax = plt.subplots(num_plot, num_plot)
for m in range(num_plot):
    for n in range(num_plot):
        idx = np.random.randint(0, train_data.shape[0])
        ax[m, n].imshow(train_data[idx])
        ax[m, n].get_xaxis().set_visible(False)
        ax[m, n].get_yaxis().set_visible(False)
f.subplots_adjust(hspace=0.1)
f.subplots_adjust(wspace=0)
plt.show()

## Training and NAS

In [None]:
def objective_NN(nstacks=3,init_num_filter=16,kernel_size=3,
                 use_batch_norm=True, use_act = True, platform_connected = False, quantization=True):
    print(nstacks,init_num_filter,kernel_size,use_batch_norm,use_act)
    training_flag = 0
    score = -5.0
    accuracy = -1.0
    model = generate_resnet(nstacks=nstacks,init_num_filter=init_num_filter,kernel_size=kernel_size,
                 use_batch_norm=use_batch_norm, use_act = use_act,num_classes=10)
    lr_scheduler = LearningRateScheduler(lr_schedule)
    optimizer = tf.keras.optimizers.Adam()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics='accuracy', loss_weights=None,
    weighted_metrics=None, run_eagerly=None )
    maxRAM, maxFlash = return_hardware_specs(device)
    if(platform_connected == False):
        Latency = get_flops(model, batch_size=1) #latency proxy
        RAM = get_model_memory_usage(batch_size=1, model=model) #SRAM proxy
        if(quantization==True):
            RAM = RAM/8.0
        Flash =  get_model_flash_usage(model,'pretrainedResnet',quantization=quantization) #flash proxy
        if(RAM < maxRAM and Flash < maxFlash):
            training_flag = 1
        else:
            training_flag = 0
            score = -5.0
            accuracy = -1.0
    else:
        RAM, Flash, Latency, err_flag = platform_in_the_loop_controller(model,'pretrainedResnet', 
                                            device,dir_path=dirpath,quantization=quantization)
        if(RAM!=-1 and Flash!=-1 and Latency!=-1):
            training_flag = 1
        else:
            training_flag = 0
            score = -5.0
            accuracy = -1.0
            
    if(training_flag == 1):
        history = model.fit(datagen.flow(train_data, train_labels, batch_size=BS),
          steps_per_epoch=len(train_data) / BS, epochs=EPOCHS, callbacks=[lr_scheduler])
        accuracy = history.history['accuracy'][-1]
        if(platform_connected == False):
            score = accuracy + 0.01*((RAM/maxRAM) + (Flash/maxFlash)) +  0.01*(Latency/1e6)
        else:
            score = accuracy + 0.01*((RAM/maxRAM) + (Flash/maxFlash)) +  0.01*(Latency/0.8)
    
    row_write = [score, accuracy,RAM,Flash,Latency,
             nstacks,init_num_filter,kernel_size,use_batch_norm,use_act]
    print('Design choice:',row_write)
    with open(log_file_name, 'a', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(row_write)     
    
    return score

In [None]:
def save_res(data, file_name):
    pickle.dump( data, open( file_name, "wb" ) )
    
param_dict = {
    'nstacks': range(1,5),
    'init_num_filter': [2,4,6,8,10,12,14,16,18,20,22,24],
    'kernel_size': [1,3,5,7],
    'use_batch_norm': [True, False],
    'use_act': [True, False]
}

def objfunc(args_list):

    objective_evaluated = []
    
    start_time = time.time()
    
    for hyper_par in args_list:
        nstacks = hyper_par['nstacks']
        init_num_filter = hyper_par['init_num_filter']
        kernel_size = hyper_par['kernel_size']
        use_batch_norm = hyper_par['use_batch_norm']
        use_act = hyper_par['use_act']
            
        objective = objective_NN(nstacks=nstacks,init_num_filter=init_num_filter,kernel_size=kernel_size,
                 use_batch_norm=use_batch_norm, use_act = use_act, 
                platform_connected = platform_connected, quantization=quantization)
        objective_evaluated.append(objective)
        
        end_time = time.time()
        print('objective:', objective, ' time:',end_time-start_time)
        
    return objective_evaluated

In [None]:
conf_Dict = dict()
conf_Dict['batch_size'] = 1 
conf_Dict['num_iteration'] = NAS_EPOCHS
conf_Dict['initial_random']= 5
tuner = Tuner(param_dict, objfunc,conf_Dict)
all_runs = []
results = tuner.maximize()
all_runs.append(results)
save_res(all_runs,log_file_name[0:-4]+'.p')

## Train Best Model and Evaluate

In [None]:
nstacks = results['best_params']['nstacks']
init_num_filter = results['best_params']['init_num_filter']
kernel_size = results['best_params']['kernel_size']
use_batch_norm = results['best_params']['use_batch_norm']
use_act = results['best_params']['use_act']

model = generate_resnet(nstacks=nstacks,init_num_filter=init_num_filter,kernel_size=kernel_size,
                 use_batch_norm=use_batch_norm, use_act = use_act,num_classes=10)
lr_scheduler = LearningRateScheduler(lr_schedule)
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics='accuracy', loss_weights=None,
    weighted_metrics=None, run_eagerly=None )
history = model.fit(datagen.flow(train_data, train_labels, batch_size=BS),
          steps_per_epoch=len(train_data) / BS, epochs=EPOCHS, callbacks=[lr_scheduler])


In [None]:
model.save("trained_models/" + model_name)

In [None]:
train_data, train_filenames, train_labels, test_data, test_filenames, test_labels, label_names = \
    load_cifar_10_data(cifar_10_dir)
_idxs = np.load('perf_samples_idxs.npy')
test_data = test_data[_idxs]
test_labels = test_labels[_idxs]
test_filenames = test_filenames[_idxs]

print("Test data: ", test_data.shape)
print("Test filenames: ", test_filenames.shape)
print("Test labels: ", test_labels.shape)
print("Label names: ", label_names.shape)
label_classes = np.argmax(test_labels,axis=1)
print("Label classes: ", label_classes.shape)

In [None]:
model = tf.keras.models.load_model('trained_models/' + model_name)

test_metrics = model.evaluate(x=test_data, y=test_labels, batch_size=32, verbose=1, return_dict=True)

print("Performances on cifar10 test set")
print("Keras evaluate method")
print("Accuracy keras: ", test_metrics['accuracy'])
print("---------------------")

predictions = model.predict(test_data)

print("EEMBC calculate_accuracy method")
accuracy_eembc = eval_functions_eembc.calculate_accuracy(predictions, label_classes)
print("---------------------")

auc_scikit = roc_auc_score(test_labels, predictions)
print("sklearn.metrics.roc_auc_score method")
print("AUC sklearn: ", auc_scikit)
print("---------------------")

print("EEMBC calculate_auc method")
auc_eembc = eval_functions_eembc.calculate_auc(predictions, label_classes, label_names, model_name)
print("---------------------")