In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import pandas as pd
import numpy as np
from gtda.time_series import SlidingWindow
import matplotlib.pyplot as plt
import csv
import random
import itertools
import sys
import shutil
from tqdm import tqdm
import helpermethods
from edgeml_tf.trainer.bonsaiTrainer import BonsaiTrainer
from edgeml_tf.graph.bonsai import Bonsai
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from scipy.stats import uniform
from data_utils import *
import re
from mango.tuner import Tuner
import time
import pickle
import glob
from edgeml_tf.tflite.bonsaiLayer import BonsaiLayer
from tensorflow import keras
import tensorflow.compat.v1.keras.backend as K

## Import Dataset

In [None]:
f = '/home/nesl/earable_light/Activity_Dataset/' #Dataset directory
model_dir = 'trained_models/'
window_size = 550
stride = 50
channels = 2

X_tr, Y_tr, X_test, Y_test = import_auritus_activity_dataset(dataset_folder = f, 
                                use_timestamp=False, 
                                shuffle=True, 
                                window_size = window_size, stride = stride, 
                                return_test_set = True, test_set_size = 300,channels=2)
print(X_tr.shape)
print(Y_tr.shape)
print(X_test.shape)
print(Y_test.shape)

# Feature Extraction

In [None]:
feat_size = 10
featX_tr = np.zeros((X_tr.shape[0],feat_size))
featX_test = np.zeros((X_test.shape[0],feat_size))
for i in range(X_tr.shape[0]):
    cur_win = X_tr[i]
    featX_tr[i,0] = np.min(cur_win[:,0])
    featX_tr[i,1] = np.min(cur_win[:,1])
    featX_tr[i,2] = np.max(cur_win[:,0])
    featX_tr[i,3] = np.max(cur_win[:,1])
    featX_tr[i,4] = featX_tr[i,2]-featX_tr[i,0]
    featX_tr[i,5] = featX_tr[i,3]-featX_tr[i,1]
    featX_tr[i,6] = np.var(cur_win[:,0])
    featX_tr[i,7] = np.var(cur_win[:,1])
    featX_tr[i,8] = np.sqrt(featX_tr[i,6])
    featX_tr[i,9] = np.sqrt(featX_tr[i,7])  
    
for i in range(X_test.shape[0]):
    cur_win = X_test[i]
    featX_test[i,0] = np.min(cur_win[:,0])
    featX_test[i,1] = np.min(cur_win[:,1])
    featX_test[i,2] = np.max(cur_win[:,0])
    featX_test[i,3] = np.max(cur_win[:,1])
    featX_test[i,4] = featX_test[i,2]-featX_test[i,0]
    featX_test[i,5] = featX_test[i,3]-featX_test[i,1]
    featX_test[i,6] = np.var(cur_win[:,0])
    featX_test[i,7] = np.var(cur_win[:,1])
    featX_test[i,8] = np.sqrt(featX_test[i,6])
    featX_test[i,9] = np.sqrt(featX_test[i,7])

dataDimension = featX_tr.shape[1]
numClasses = Y_tr.shape[1]
Xtrain = featX_tr
Ytrain = Y_tr
Xtest = featX_test
Ytest = Y_test

# Training and Hyperparameter Tuning


In [None]:
totalEpochs = 1000 #epochs to train each model for
bayesEpochs = 50 #epochs for hyperparameter tuning
log_file_name = 'Hyperparams_Bonsai.csv'
if os.path.exists(log_file_name):
    os.remove(log_file_name)
if os.path.exists(log_file_name[0:-4]+'.p'):
    os.remove(log_file_name[0:-4]+'.p')
row_write = ['score', 'accuracy','Flash','Epoch','Sigma','Depth','ProjectionDimension']
with open(log_file_name, 'a', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(row_write)

In [None]:
def objective_NN(sigma = 1.0, depth = 3, projectionDimension = 22,numClasses=9):
    
    #Fixed hyperparameters:
    
    #Regularizers for Bonsai Parameters
    regZ = 0.0001
    regW = 0.001
    regV = 0.001
    regT = 0.001

    learningRate = 0.01
    outFile = None
    #Sparsity for Bonsai Parameters. x => 100*x % are non-zeros
    sparZ = 0.2
    sparW = 0.3
    sparV = 0.3
    sparT = 0.62
    batchSize = np.maximum(100, int(np.ceil(np.sqrt(Ytrain.shape[0]))))
    useMCHLoss = True #(False = Cross Entropy)
    #Bonsai uses one classier for Binary, thus this condition
    if numClasses == 2:
        numClasses = 1
        
    X = tf.placeholder("float32", [None, dataDimension])
    Y = tf.placeholder("float32", [None, numClasses])
    
    dataDir = model_dir
    shutil.rmtree(dataDir, ignore_errors=True)
    os.mkdir(dataDir)
    currDir = helpermethods.createTimeStampDir(dataDir)
    helpermethods.dumpCommand(sys.argv, currDir)
    
    #Instantiating the Bonsai Graph which will be used for training and inference.
    bonsaiObj = Bonsai(numClasses, dataDimension, projectionDimension, depth, sigma)
    
    #Instantiating the Bonsai Trainer which will be used for 3 phase training.
    bonsaiTrainer = BonsaiTrainer(bonsaiObj, regW, regT, regV, regZ, sparW, sparT, sparV, sparZ,
                              learningRate, X, Y, useMCHLoss, outFile)
    #Session declaration and variable initialization. Interactive Session doesn't clog the entire GPU.
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    
    #The method to to run the 3 phase training, followed by giving out the best early stopping model, accuracy along with saving of the parameters.
    bonsaiTrainer.train(batchSize, totalEpochs, sess,
                    Xtrain, Xtest, Ytrain, Ytest, dataDir, currDir)
    
    model_size = bonsaiTrainer.getModelSize()[1] #flash usage
    res_file =  open(model_dir+"TFBonsaiResults.txt","r").read()
    accu = float(re.findall("\d+\.\d+", res_file[0:20])[0]) #accuracy
    ep = int(re.findall("\d+", res_file[[m.start() for m in re.finditer('totalEpochs', res_file)][0]:[m.start() 
                                for m in re.finditer('ModelSize', res_file)][0]])[0]) #epoch at which max test accuracy happened
    
    
    score = 1.0*accu + 0.0*model_size #you can weigh the score to take into account model size too
    row_write = [score, accu,model_size,ep,sigma,depth,projectionDimension]
    with open(log_file_name, 'a', newline='') as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(row_write)
        
    return score

In [None]:
import pickle 

def save_res(data, file_name):
    pickle.dump(data, open(file_name, "wb" ))
    
param_dict = {
    'sigma': uniform(1,4),
    'depth': [1,2,3,4,5,6],
    'projectionDimension': np.arange(10,70)
}

def objfunc(args_list):

    objective_evaluated = []
    
    start_time = time.time()
    
    for hyper_par in args_list:
        sigma = hyper_par['sigma']
        depth = hyper_par['depth']
        projectionDimension = hyper_par['projectionDimension']
            
        objective = objective_NN(sigma=sigma,depth=depth,
                                 projectionDimension=projectionDimension,numClasses=numClasses)
        objective_evaluated.append(objective)
        
        end_time = time.time()
        print('objective:', objective, ' time:',end_time-start_time)
        
    return objective_evaluated

conf_Dict = dict()
conf_Dict['batch_size'] = 1 
conf_Dict['num_iteration'] = bayesEpochs
conf_Dict['initial_random']= 5
tuner = Tuner(param_dict, objfunc,conf_Dict)
all_runs = []
results = tuner.maximize()
all_runs.append(results)
save_res(all_runs,log_file_name[0:-4]+'.p')

# Train the best model

In [None]:
depth = results['best_params']['depth']
projectionDimension = results['best_params']['projectionDimension']
sigma = results['best_params']['sigma']

regZ = 0.0001
regW = 0.001
regV = 0.001
regT = 0.001

learningRate = 0.01
outFile = None
#Sparsity for Bonsai Parameters. x => 100*x % are non-zeros
sparZ = 0.2
sparW = 0.3
sparV = 0.3
sparT = 0.62
batchSize = np.maximum(100, int(np.ceil(np.sqrt(Ytrain.shape[0]))))
useMCHLoss = True #(False = Cross Entropy)
#Bonsai uses one classier for Binary, thus this condition
if numClasses == 2:
    numClasses = 1

X = tf.placeholder("float32", [None, dataDimension])
Y = tf.placeholder("float32", [None, numClasses])

dataDir = model_dir
shutil.rmtree(dataDir, ignore_errors=True)
os.mkdir(dataDir)
currDir = helpermethods.createTimeStampDir(dataDir)
helpermethods.dumpCommand(sys.argv, currDir)

#Instantiating the Bonsai Graph which will be used for training and inference.
bonsaiObj = Bonsai(numClasses, dataDimension, projectionDimension, depth, sigma)

#Instantiating the Bonsai Trainer which will be used for 3 phase training.
bonsaiTrainer = BonsaiTrainer(bonsaiObj, regW, regT, regV, regZ, sparW, sparT, sparV, sparZ,
                          learningRate, X, Y, useMCHLoss, outFile)
#Session declaration and variable initialization. Interactive Session doesn't clog the entire GPU.
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

#The method to to run the 3 phase training, followed by giving out the best early stopping model, accuracy along with saving of the parameters.
bonsaiTrainer.train(batchSize, totalEpochs, sess,
                Xtrain, Xtest, Ytrain, Ytest, dataDir, currDir)

# Deployment

### Conversion to TFLite

In [None]:
os.system("python3 bonsai_to_tflite.py --model_dir "+model_dir) #tflite model saved in current directory

### Conversion to C++

In [None]:
os.system("xxd -i bonsai_model.tflite > model.cc")
with open('model.cc') as f:
    z = f.readlines()
f.close()   
z.insert(0,'#include "model.h"\n#ifdef __has_attribute\n#define HAVE_ATTRIBUTE(x) __has_attribute(x)\n#else\n#define HAVE_ATTRIBUTE(x) 0\n#endif\n#if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__))\n#define DATA_ALIGN_ATTRIBUTE __attribute__((aligned(4)))\n#else\n#define DATA_ALIGN_ATTRIBUTE\n#endif\n')
z = [w.replace('bonsai_model_tflite','bonsai_model') for w in z]
z = [w.replace('unsigned char bonsai_model[]','const unsigned char bonsai_model[] DATA_ALIGN_ATTRIBUTE') for w in z]
z[-1] = ""
my_f = open("model.cc","w")
for item in z:
    my_f.write(item)
my_f.close()


h_file_cont = ['#ifndef BONSAI_MODEL_H_\n',
           '#define BONSAI_MODEL_H_\n',
          'extern const unsigned char bonsai_model[];\n',
          '#endif\n']
my_f = open("model.h","w")
for item in h_file_cont:
    my_f.write(item)
my_f.close()

### Quantization Example

In [None]:
lowest_dirs = list()

for root,dirs,files in os.walk(model_dir):
    if not dirs:
        lowest_dirs.append(root)

os.system("python3 quantizeBonsaiModels.py --model-dir "+lowest_dirs[0][0:-6])