In [1]:
import os
import numpy as np
import json
import pandas as pd
import ast
import scipy

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold

import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

cwd = os.getcwd()

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


## Dataframe loading

In [3]:
#LOAD 516 DATASET FOR FIRST TRAINING AND VALIDATION
dataframe_name = "dataframe516"
pkl_path = f"./pickle/{dataframe_name}.pkl"
df = pd.read_pickle(pkl_path)

In [4]:
#DEFINE FUNCTIONS FOR SELECTING PARTS OF THE DATASET REGARDING DIFFERENT CONFIGURATIONS OF OCCUPANTS

#select only data that have on seat1 a children or empty
def select_only_children_on_seat1(df):
    select_df = df.copy()
    select_df = select_df[(select_df["seat1"] == "toddler") | (select_df["seat1"] == "baby") | (select_df["seat1"] == "none")]
    return select_df

#select only data that have one ore more pets alone in the back seats (OR NONE)
def select_only_pet(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "pet") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "pet") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "none"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "pet") & (select_df["seat2"] == "none") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "pet") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none"))]
    return select_df

#select only data that have only one target, adult or toddler, in the back seats (OR NONE)
def select_only_single(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "adult") & (select_df["seat2"] == "none"))
                          | ((select_df["seat1"] == "toddler") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "toddler") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "toddler") & (select_df["seat2"] == "none"))
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "none") & (select_df["seat2"] == "none"))]
    return select_df

#select only data that have only one target, adult or toddler, in the back seats. (NO NONE)
def select_only_single_true(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "adult") & (select_df["seat2"] == "none"))
                          | ((select_df["seat1"] == "toddler") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "toddler") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "toddler") & (select_df["seat2"] == "none"))]
    return select_df

#select only data that have one ore more adults alone in the back seats (OR NONE)
def select_only_adult(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "adult") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "adult") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none"))]
    return select_df

In [5]:
#DEFINE FUNCTIONS FOR ASSIGNING OCCUPANTS

#assign occupations status of seats
def assign_occupations(df):
    for seat_number in range(1,4):
        occ_seat = []
        seat = 'seat' + str(seat_number)
        for x in df[seat]:
            if x != 'none':
                occ_seat.append(1)
            else:
                occ_seat.append(0)
        df['class' + str(seat_number)] = occ_seat

#DEFINE PRESENCE AS AT LEAST 1 SEAT OCCUPIED
def assign_presence(df):
    presences = []
    for index, row in df.iterrows():
        presence = row['class1'] or row['class2'] or row['class3']
        presences.append(presence)
    df['presence'] = presences

#ASSING NUMBER OF OCCUPANTS
def assign_occupants(df):
    occupants = []
    for index, row in df.iterrows():
        count = 0
        count = row['class1'] + row['class2'] + row['class3']
        if count == 3:
            count = 2
        occupants.append(count)
    df['occupants'] = occupants

In [6]:
#DETERMINE PRESENCE ON OUR DATASET
assign_occupations(df)
assign_occupants(df)

In [7]:
complete_df = df

In [8]:
df_info = complete_df[['occupants', 'class1', 'class2', 'class3']]
df_info

Unnamed: 0,occupants,class1,class2,class3
0,1,0,0,1
1,2,1,1,1
2,2,1,0,1
3,1,0,1,0
4,2,1,1,0
...,...,...,...,...
473,1,0,0,1
474,1,1,0,0
475,1,1,0,0
476,0,0,0,0


In [8]:
#LENGHT OF THE NEWLY GENERATED DATASET
len(complete_df)

478

In [9]:
#DESCRIPTION OF THE NEWLY GENERATED DATASET
complete_df.describe()

Unnamed: 0,class1,class2,class3,occupants
count,478.0,478.0,478.0,478.0
mean,0.539749,0.26569,0.382845,1.075314
std,0.49894,0.442163,0.48659,0.767922
min,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0
50%,1.0,0.0,0.0,1.0
75%,1.0,1.0,1.0,2.0
max,1.0,1.0,1.0,2.0


In [10]:
#SPLIT THE DATASET IN TRAIN AND TEST
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(complete_df, test_size=0.15, random_state = 42, stratify=complete_df.occupants)

In [11]:
#LENGHT OF THE TEST DATASET
print(len(train_df))
print(len(test_df))

406
72


## Data preprocessing

In [12]:
working_df = train_df
#TRAINING WILL BE DONE WITH THE FFTDATA COLUMN
train_list = working_df.fftData
train_list = np.array(train_list)
train_x = []

""" Here is performed the frequency selection part of the preprocessing. 
    Since the fft spectrum is divided in two spectrum of 128 bits each, for performing frequency selection 
    we need to select from both the first part and the second one.
    only one/fraction of the frequencies are selected.
"""

#Select only first third of both images
fraction = 3 
fraction_data = int(round(128/fraction)) #fraction_data=43 in this case

for i in range(len(train_list)):
    #print(len(train_list[i]))
    #print(len(train_list[i][0]))
    
    a = np.array(train_list[i])[:, 0 : fraction_data]
    b = np.array(train_list[i])[:, 128 : 128 + fraction_data]
    c = np.concatenate((a, b), axis=1)
    train_x.append(c)
train_arr = []
for x in range(len(train_x)):
    train_arr.append(np.array(train_x[x]))
train_list = train_arr 

"""
zscore normalization part of the preprocessing. correcting the dimension of the network.
"""

print(np.mean(train_list))
print(np.std(train_list))
train_list = scipy.stats.zscore(train_list, axis=None)

#max = np.max(train_list)
#min = np.min(train_list)
#train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])


train_tensor = tf.convert_to_tensor(train_list)

#Third dimension value is 1
train_tensor = tf.expand_dims(train_tensor, -1)

print(train_tensor.shape)

"""
assigning label 
"""

train_label = working_df["occupants"]

#PROPORTIONS OF THE DATASET
passengers0 = 0
passengers1 = 0
passengers2 = 0
#passengers3 = 0
for occupants in working_df["occupants"]:
    if occupants == 0:
        passengers0+=1
    if occupants == 1:
        passengers1+=1
    if occupants == 2:
        passengers2+=1
    #if occupants == 3:
        #passengers3+=1
balancing0 = passengers0/len(train_df)
balancing1 = passengers1/len(train_df)
balancing2 = passengers2/len(train_df)
#balancing3 = passengers3/len(train_df)
#balancing = np.mean(working_df["occupants"])
print(balancing0)
print(balancing1)
print(balancing2)
#print(balancing3)

train_label = tf.keras.utils.to_categorical(train_label, 3)

"""Dimensions of the inputs"""
#53*86 images
img_h, img_w = 53, fraction_data*2
num_classes=3

-105.20099314637616
14.199503543044287
(406, 53, 86, 1)
0.25862068965517243
0.4064039408866995
0.33497536945812806


## Defining network architecture

In [13]:
# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, inputs, targets, test_image_indices):

  # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    predictions = np.zeros((len(test_image_indices),), dtype=int)
    for i, test_image_index in enumerate(test_image_indices):
        #print(test_image_index)
        test_image = inputs[test_image_index]
        test_label = targets[test_image_index]

        # Check if the input type is quantized, then rescale input data to uint8
        #print(input_details['dtype'])
        if input_details['dtype'] == np.int8:
            #print("correct")
            input_scale, input_zero_point = input_details["quantization"]
            test_image = test_image / input_scale + input_zero_point

        test_image = np.expand_dims(test_image, axis=0).astype(input_details["dtype"])
        interpreter.set_tensor(input_details["index"], test_image)
        interpreter.invoke()
        output = interpreter.get_tensor(output_details["index"])[0]
        #print(output)
        predictions[i] = output.argmax()

    return predictions

In [14]:
def train_network(MP_dim2, conv_kernel_w2, conv_kernel_h2, conv_block2, drop_out_rate2, start_f2, regularizer, inputs, targets, dilation_rate, fraction):
    # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training model with MP {MP_dim2}, conv kernel{[conv_kernel_w2, conv_kernel_h2]}, {conv_block2} blocks, {start_f2} filters, {drop_out_rate2} do...')
    experiment = "3_classes_test_4" #¡¡¡¡¡CHANGE FOLDER!!!!!!
    acc_per_fold = []
    loss_per_fold = []
    train_acc_per_fold = []
    
    acc_per_fold_quant = []
    train_acc_per_fold_quant = []
    
    fold_no = 1
    Y_pred_list = []
    Y_true_list = []
    
    #--------------------------------STATIC PARAMETERS------------------------------------------
    # Define the K-fold Cross Validator
    kfold = KFold(n_splits=10, shuffle=True)
    #loo = LeaveOneOut()
    
    
    # Optimization params
    # -------------------

    # Loss
    loss = tf.keras.losses.BinaryCrossentropy()

    # learning rate
    lr = 0.3e-4
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    # -------------------

    # Validation metrics
    # ------------------

    metrics = ['accuracy']

    batch_size = 32
    
    n_epoch = 400
    
    
    #------------------------------------CALLBACKS----------------------------------------
    callbacks = []
    
    # Early Stopping
    # --------------
    early_stop = False
    if early_stop:
        es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=False,)
        callbacks.append(es_callback)
    
    # ----------------------------------CROSSVALIDATION-----------------------------------
    for train, test in kfold.split(inputs, targets):
      # Define the model architecture
        print(f'{MP_dim2}, {[conv_kernel_w2, conv_kernel_h2]}, {conv_block2}, {start_f2}, {drop_out_rate2}, fold {fold_no}...')
        model = tf.keras.Sequential()
        
        input_shape = [img_h, img_w, 1]

        model.add(tf.keras.layers.MaxPool2D(pool_size=(MP_dim2, MP_dim2), input_shape=input_shape))
        
        n_filters = start_f2
        
        for i in range(conv_block2):
            # Conv block: Conv2D -> Conv2D -> Activation -> Pooling
            model.add(tf.keras.layers.Conv2D(filters=n_filters, 
                                             kernel_size=(conv_kernel_w2, conv_kernel_h2),
                                             strides=(1, 1),
                                             dilation_rate = dilation_rate,
                                             padding='same'))
            model.add(tf.keras.layers.Conv2D(filters=n_filters, 
                                             kernel_size=(conv_kernel_h2, conv_kernel_w2),
                                             strides=(1, 1),
                                             dilation_rate = dilation_rate,
                                             padding='same',
                                             activation = 'relu'))  
            model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))


        # Classifier
        
        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dropout(drop_out_rate2))
        model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax', kernel_regularizer=regularizer))

        # Compile the model
        model.compile(loss=loss,
                    optimizer=optimizer,
                    metrics=['accuracy'])
        # Fit data to model
        class_weights={0: balancing0, 1: balancing1, 2: balancing2}
        history = model.fit(inputs[(train)], targets[(train)],
                    class_weight=class_weights,
                    batch_size=batch_size,
                    epochs=n_epoch,
                    validation_data=(inputs[(test)], targets[(test)]),
                    callbacks = callbacks,
                    verbose=False)

        acc_per_fold.append(history.history['val_accuracy'])
        loss_per_fold.append(history.history['val_loss'])
        train_acc_per_fold.append(history.history['accuracy'])

        Y_prediction = model.predict(inputs[test])
        Y_pred_list.append(np.argmax(Y_prediction,axis = 1)) 
        Y_true_list.append(np.argmax( targets[test],axis = 1))
        
        #-------------------------------SAVE MODEL-----------------------------------------
        
        MODELS_DIR = f'models/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}/{MP_dim2}-{conv_kernel_w2}-{conv_kernel_h2}-{conv_block2}-{start_f2}-{drop_out_rate2}-{dilation_rate}/'
        try:
            if not os.path.exists(MODELS_DIR):
                os.makedirs(MODELS_DIR)
        except e:
            if e.errno != errno.EEXIST:
                raise   
            # time.sleep might help here
            pass
            
        MODEL_TF = MODELS_DIR + f'fold_{fold_no}'
        model.save(MODEL_TF)
        # Increase fold number
        
        
            #--------------------------QUANTIZE THE MODEL ----------------------------------
        MODEL_TFLITE = MODELS_DIR + f'fold_{fold_no}.tflite'

        def representative_dataset():
            for data in tf.data.Dataset.from_tensor_slices((inputs)).batch(1).take(100):
                #print(data)
                yield [tf.dtypes.cast(data, tf.float32)]
                

        converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_TF)
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        # Enforce integer only quantization
        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.int8
        # Provide a representative dataset to ensure we quantize correctly.
        converter.representative_dataset = representative_dataset
        model_tflite = converter.convert()

        open(MODEL_TFLITE, "wb").write(model_tflite)

        predictions = run_tflite_model(MODEL_TFLITE, inputs, targets, test)

        accuracy = (np.sum(np.argmax( targets[test],axis = 1) == predictions) * 100) / len(test)
        
        predictions_train = run_tflite_model(MODEL_TFLITE, inputs, targets, train)
        accuracy_train = (np.sum(np.argmax( targets[train],axis = 1) == predictions_train) * 100) / len(train)
        
        acc_per_fold_quant.append(accuracy)
        train_acc_per_fold_quant.append(accuracy_train)
        print(f"accuracy: {np.array(acc_per_fold)[:, -1]}")
        print(f"quantized accuracy: {acc_per_fold_quant}")
        #print(train_acc_per_fold_quant)"""
        fold_no = fold_no + 1
    
    #---------------------------SAVE RESULTS TO JSON---------------------------------------
    Y_true = []
    Y_pred = []
    for i in range(len(Y_pred_list)):
        Y_true = np.concatenate((Y_true, Y_true_list[i]))
        Y_pred = np.concatenate((Y_pred, Y_pred_list[i]))
    row = {'MaxPoolDim' : MP_dim2, 
           'conv_kernel_dim' : [conv_kernel_w2, conv_kernel_h2], 
           'n_conv' : conv_block2, 
           'n_filters' : start_f2, 
           'dropout' : drop_out_rate2, 
           'n_epochs' : n_epoch,
           'dilation_rate' : dilation_rate,
           'train_accuracy' : np.mean(train_acc_per_fold, axis=0).tolist(),
           'valid_accuracy' : np.mean(acc_per_fold, axis=0).tolist(),
           'Y_true' : Y_true.tolist(), 
           'Y_pred' : Y_pred.tolist(),
           'train_accuracy_quant':  np.mean(train_acc_per_fold_quant), 
           'valid_accuracy_quant':  np.mean(acc_per_fold_quant)
          }
    JSON_DIR = f'json_child/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}'
    if not os.path.exists(JSON_DIR):
        os.makedirs(JSON_DIR)
    try:
        with open(f'{JSON_DIR}/{MP_dim2}-{conv_kernel_w2}-{conv_kernel_h2}-{conv_block2}-{start_f2}-{drop_out_rate2}-{dilation_rate}.json', 'w') as f:
            json.dump(row, f)
    except Exception as e:
        print(e)
        
    
    #--------------------------PLOT ACCURACIES CURVES ------------------------------
    plt.clf()
    plt.plot(np.mean(train_acc_per_fold, axis=0))
    plt.plot(np.mean(acc_per_fold, axis=0))
    plt.savefig(f'{JSON_DIR}/{MP_dim2}-{conv_kernel_w2}-{conv_kernel_h2}-{conv_block2}-{start_f2}-{drop_out_rate2}-{dilation_rate}.png')
    plt.close()
    
    
    return 1

## Perform grid search

In [15]:
# Merge inputs and targets

inputs = np.array(train_tensor)
targets = train_label

#-----------------------------------GRID SEARCH PARAMETERS ---------------------------

#firts MaxPool dimension, to reduce the input size.
MP_dims = [2]

#kernel dimension of filters. 
conv_kernels = [[3,3],[5,5],[7,7]]

#number of convolution blocks
conv_blocks = [2, 3, 4] 

#Dropout rate
drop_out_rates = [0.3]

#number of filters in each convolutional layer
start_fs =  [6, 8, 10, 12, 14, 16, 18, 20, 22]

#regularizers to use in the network
regularizers = [None]#, regularizers.l2(1e-4), regularizers.l2(1e-5), regularizers.l2(1e-3)]

#dilation rates of the convolutional layers
dilation_rates = [1, 2]

n_models = len(MP_dims) * len(conv_kernels) * len(conv_blocks) * len(start_fs) * len(drop_out_rates) * len(dilation_rates)

print(f"{n_models} will be trained")

162 will be trained


In [None]:
def train_one(arg):
    train_network(arg[0], arg[1], arg[2],arg[3],arg[4], arg[5], arg[6], arg[7], arg[8], arg[9], arg[10])

def train_all(args):
    for elem in args:
        train_one(elem)
    return 0


if __name__ == "__main__":
    args = []
    for MP_dim in MP_dims:
        for conv_kernel in conv_kernels:
            for conv_block in conv_blocks:
                for drop_out_rate in drop_out_rates:
                    for start_f in start_fs:
                        for regularizer in regularizers:
                            for dilation_rate in dilation_rates:
                                args.append([MP_dim, conv_kernel[0], conv_kernel[1], conv_block, drop_out_rate, start_f, regularizer, inputs, targets, dilation_rate, fraction])

    results = train_all(args)

## 

In [18]:
"""retrain the best performing network on train + validation dataset""" 
accuracies=[]
def train_random_states(amount):
    for i in range(amount):
        print('----------------------------------------------------------------------------')
        print(f'RND STATE NUMBER = {i}')
        print('----------------------------------------------------------------------------')
        """SPLIT THE DATASET"""
        from sklearn.model_selection import train_test_split

        train_df, test_df = train_test_split(df, test_size=0.15, random_state = i)
        
        """DATA PREPROCESSING"""
        working_df = train_df
        #TRAINING WILL BE DONE WITH THE FFTDATA COLUMN
        train_list = working_df.fftData
        train_list = np.array(train_list)
        train_x = []

        """ Here is performed the frequency selection part of the preprocessing. 
            Since the fft spectrum is divided in two spectrum of 128 bits each, for performing frequency selection 
            we need to select from both the first part and the second one.
            only one/fraction of the frequencies are selected.
        """

        #Select only first third of both images
        fraction = 3 
        fraction_data = int(round(128/fraction)) #fraction_data=43 in this case

        for i in range(len(train_list)):
            #print(len(train_list[i]))
            #print(len(train_list[i][0]))

            a = np.array(train_list[i])[:, 0 : fraction_data]
            b = np.array(train_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            train_x.append(c)
        train_arr = []
        for x in range(len(train_x)):
            train_arr.append(np.array(train_x[x]))
        train_list = train_arr 

        """
        zscore normalization part of the preprocessing. correcting the dimension of the network.
        """

        print(np.mean(train_list))
        print(np.std(train_list))
        train_list = scipy.stats.zscore(train_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])


        train_tensor = tf.convert_to_tensor(train_list)

        #Third dimension value is 1
        train_tensor = tf.expand_dims(train_tensor, -1)

        print(train_tensor.shape)

        """
        assigning label 
        """

        train_label = working_df["occupants"]

        #PROPORTIONS OF THE DATASET
        passengers0 = 0
        passengers1 = 0
        passengers2 = 0
        #passengers3 = 0
        for occupants in working_df["occupants"]:
            if occupants == 0:
                passengers0+=1
            if occupants == 1:
                passengers1+=1
            if occupants == 2:
                passengers2+=1
            #if occupants == 3:
                #passengers3+=1
        balancing0 = passengers0/len(train_df)
        balancing1 = passengers1/len(train_df)
        balancing2 = passengers2/len(train_df)
        #balancing3 = passengers3/len(train_df)
        #balancing = np.mean(working_df["occupants"])
        print(balancing0)
        print(balancing1)
        print(balancing2)
        #print(balancing3)

        train_label = tf.keras.utils.to_categorical(train_label, 3)

        """Dimensions of the inputs"""
        #53*86 images
        img_h, img_w = 53, fraction_data*2
        num_classes=3
        
        """
        Perform the same preprocessing steps of the training set to the test set too.
        """
        test_labels = np.array(test_df["occupants"])

        test_list = test_df["fftData"]
        test_list = np.array(test_list)
        test_x = []

        fraction = 3 
        fraction_data = int(round(128/fraction))

        for i in range(len(test_list)):

            a = np.array(test_list[i])[:, 0 : fraction_data]
            b = np.array(test_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            test_x.append(c)
        test_arr = []
        for x in range(len(test_x)):
            test_arr.append(np.array(test_x[x]))
        test_list = test_arr 


        print(np.mean(test_list))
        print(np.std(test_list))
        test_list = scipy.stats.zscore(test_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])
        test_tensor = tf.convert_to_tensor(test_list)
        test_tensor = tf.expand_dims(test_tensor, -1)
        print(test_tensor.shape)
        test_images = test_tensor

        """
        test the best performing network on the test set.
        """
        
        """NETWORK DESIGN"""
        # Loss
        loss = tf.keras.losses.BinaryCrossentropy()

        # learning rate
        lr = 0.3e-4

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        # -------------------

        # Validation metrics
        # ------------------

        metrics = ['accuracy']

        batch_size = 32

        callbacks = []

        n_epoch = 300

        conv_kernels = [7]

        MP_dims = [2]

        #number of convolution blocks
        conv_blocks = [2]

        #Dropout rate
        drop_out_rates = [0.3]

        #numbers of starting filters
        start_fs = [22]

        #dilation_rate
        dilation_rates = [1]

        regularizer = None

        for MP_dim in MP_dims:
            for conv_kernel in conv_kernels:
                for conv_block in conv_blocks:
                    for drop_out_rate in drop_out_rates:
                        for n_filter in start_fs:
                            for dilation_rate in dilation_rates:

                                experiment = f"3_classes_BEST_{i}"

                                model = tf.keras.Sequential()

                                input_shape = [img_h, img_w, 1]


                                model = tf.keras.Sequential()

                                model.add(tf.keras.layers.MaxPool2D(pool_size=(MP_dim, MP_dim), input_shape=input_shape))

                                for i in range(conv_block):
                                    # Conv block: Conv2D -> Activation -> Pooling
                                    model.add(tf.keras.layers.Conv2D(filters=n_filter, 
                                                                     kernel_size=(conv_kernel, conv_kernel),
                                                                     strides=(1, 1),
                                                                     dilation_rate = dilation_rate,
                                                                     padding='same',
                                                                     input_shape=input_shape))   
                                    model.add(tf.keras.layers.Conv2D(filters=n_filter, 
                                                                     kernel_size=(conv_kernel, conv_kernel),
                                                                     strides=(1, 1),
                                                                     dilation_rate = dilation_rate,
                                                                     padding='same',
                                                                     input_shape=input_shape,
                                                                     activation='relu'))
                                    model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))


                                # Classifier

                                model.add(tf.keras.layers.Flatten())
                                model.add(tf.keras.layers.Dropout(drop_out_rate))
                                model.add(tf.keras.layers.Dense(units=num_classes, activation='sigmoid', kernel_regularizer=regularizer))

                                # Compile the model
                                model.compile(loss=loss,
                                            optimizer=optimizer,
                                            metrics=['accuracy'])


                                # Generate a print
                                print('------------------------------------------------------------------------')

                                # Fit data to model
                                class_weights={0: balancing0, 1: balancing1, 2: balancing2}
                                history = model.fit(inputs, targets,
                                            class_weight=class_weights,
                                            batch_size=batch_size,
                                            epochs=n_epoch,
                                            callbacks = callbacks,
                                            verbose=False)

                                MODELS_DIR = f'models_full_train/experiment_{experiment}/experiment_{experiment}_{i}/{MP_dim}-{conv_kernel}-{conv_block}-{n_filter}-{drop_out_rate}-{dilation_rate}/'
                                if not os.path.exists(MODELS_DIR):
                                    os.makedirs(MODELS_DIR)
                                MODEL_TF = MODELS_DIR + f'model'
                                MODEL_TFLITE_MICRO = MODELS_DIR + 'model.cc'
                                MODEL_TFLITE = MODELS_DIR + 'model.tflite'
                                model.save(MODEL_TF)

                                def representative_dataset():
                                    for data in tf.data.Dataset.from_tensor_slices((inputs)).batch(1).take(100):
                                        #print(data)
                                        yield [tf.dtypes.cast(data, tf.float32)]


                                converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_TF)
                                converter.optimizations = [tf.lite.Optimize.DEFAULT]
                                # Enforce integer only quantization
                                converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
                                converter.inference_input_type = tf.int8
                                converter.inference_output_type = tf.int8
                                # Provide a representative dataset to ensure we quantize correctly.
                                converter.representative_dataset = representative_dataset
                                model_tflite = converter.convert()

                                open(MODEL_TFLITE, "wb").write(model_tflite)
                                #open(MODEL_TFLITE2, "wb").write(model_tflite)
                                
        outs = model(test_images)#, test_labels, verbose=2)
        predicted = np.argmax(outs, axis=1)
        print(predicted)
        print(test_labels)

        from sklearn.metrics import confusion_matrix
        conf = confusion_matrix(test_labels, predicted)
        accuracy = 0
        top = 0
        bottom = 0
        for j in range(0,len(conf)):
            top += conf[j][j]
            for k in range(0,len(conf[j])):
                bottom += conf[j][k]
        accuracy = top/bottom
        print(f"accuracy of experiment_{experiment}_{i} = {accuracy}")
        accuracies.append(accuracy)
        
    row = {'accuracy' : accuracies}
    JSON_DIR = f'json_child/experiment_3_classes_BEST/fraction_{fraction}/n_epoch_300'
    if not os.path.exists(JSON_DIR):
        os.makedirs(JSON_DIR)
    try:
        with open(f'{JSON_DIR}/accuracies.json', 'w') as f:
            json.dump(row, f)
    except Exception as e:
        print(e)

In [None]:
train_random_states(40)

## Results are saved on a .json file 