In [27]:
import os
import numpy as np
import json
import pandas as pd
import ast
import scipy
import math

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow.keras.models import load_model, Model

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    
from os import walk
import json

cwd = os.getcwd()

In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


## Dataframe loading

In [3]:
#LOAD 516 DATASET FOR FIRST TRAINING AND VALIDATION
dataframe_name = "dataframe516"
pkl_path = f"./pickle/{dataframe_name}.pkl"
df = pd.read_pickle(pkl_path)

In [4]:
#DEFINE FUNCTIONS FOR SELECTING PARTS OF THE DATASET REGARDING DIFFERENT CONFIGURATIONS OF OCCUPANTS

#select only data that have on seat1 a children or empty
def select_only_children_on_seat1(df):
    select_df = df.copy()
    select_df = select_df[(select_df["seat1"] == "toddler") | (select_df["seat1"] == "baby") | (select_df["seat1"] == "none")]
    return select_df

#select only data that have one ore more pets alone in the back seats (OR NONE)
def select_only_pet(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "pet") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "pet") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "none"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "pet") & (select_df["seat2"] == "none") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "pet") & (select_df["seat2"] == "pet") & (select_df["seat3"] == "pet"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none"))]
    return select_df

#select only data that have only one target, adult or toddler, in the back seats (OR NONE)
def select_only_single(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "adult") & (select_df["seat2"] == "none"))
                          | ((select_df["seat1"] == "toddler") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "toddler") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "toddler") & (select_df["seat2"] == "none"))
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "none") & (select_df["seat2"] == "none"))]
    return select_df

#select only data that have only one target, adult or toddler, in the back seats. (NO NONE) (LUIS)
def select_only_single_true(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "adult") & (select_df["seat2"] == "none"))
                          | ((select_df["seat1"] == "toddler") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "toddler") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat3"] == "toddler") & (select_df["seat2"] == "none"))]
    return select_df

#select only data that have one ore more adults alone in the back seats (OR NONE) (LUIS)
def select_only_adult(df):
    select_df = df.copy()
    select_df = select_df[((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none")) 
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "adult") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "none"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "adult") & (select_df["seat2"] == "none") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "adult") & (select_df["seat2"] == "adult") & (select_df["seat3"] == "adult"))
                          | ((select_df["seat1"] == "none") & (select_df["seat2"] == "none") & (select_df["seat3"] == "none"))]
    return select_df

In [5]:
#DEFINE FUNCTIONS FOR ASSIGNING OCCUPANTS

#assign occupations status of seats
def assign_occupations(df):
    for seat_number in range(1,4):
        occ_seat = []
        seat = 'seat' + str(seat_number)
        for x in df[seat]:
            if x != 'none':
                occ_seat.append(1)
            else:
                occ_seat.append(0)
        df['class' + str(seat_number)] = occ_seat

#DEFINE PRESENCE AS AT LEAST 1 SEAT OCCUPIED
def assign_presence(df):
    presences = []
    for index, row in df.iterrows():
        presence = row['class1'] or row['class2'] or row['class3']
        presences.append(presence)
    df['presence'] = presences

#ASSING NUMBER OF OCCUPANTS (LUIS)
def assign_occupants(df):
    occupants = []
    for index, row in df.iterrows():
        count = 0
        count = row['class1'] + row['class2'] + row['class3']
        occupants.append(count)
    df['occupants'] = occupants

In [6]:
#DETERMINE PRESENCE ON OUR DATASET
assign_occupations(df)
assign_occupants(df)
df

Unnamed: 0,Id,deviceSerial,fWversion,batteryLevel,hWversion,rawData,fftData,SW Version,seat1,seat2,...,vehicle,temperature,accX,accY,accZ,createdAt,class1,class2,class3,occupants
0,090cu2xvt8o7Gmx9sMFx,2,1.0,100,1.2,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-105.87874280268608, -102.26725079789395, -1...",,none,none,...,FYMCwQblEjJrCt5S9KDV,,,,,"{'_seconds': 1603708000, '_nanoseconds': 54000...",0,0,1,1
1,0bv8HBSz1AHA8Mhp5qLm,2,1.0,100,1.2,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-103.23020764266907, -100.18002761432678, -1...",,toddler,adult,...,4GImmlvjC676xXRyFECn,,,,,"{'_seconds': 1607352646, '_nanoseconds': 12600...",1,1,1,3
2,19xTU1eBTiK4mBieg8T2,2,1.0,100,1.2,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-107.26817809590841, -103.572805055402, -110...",,pet,none,...,4GImmlvjC676xXRyFECn,,,,,"{'_seconds': 1604935813, '_nanoseconds': 52500...",1,0,1,2
3,1eEVvANdi96NtzXSDT1C,2,1.0,100,1.2,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-108.10489947686006, -105.49311958297311, -1...",,none,pet,...,4GImmlvjC676xXRyFECn,,,,,"{'_seconds': 1604936210, '_nanoseconds': 91100...",0,1,0,1
4,2EttR31aXRBWqx8dRzgq,2,1.0,100,1.2,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-117.84473965177918, -105.55119981094282, -1...",,toddler,baby,...,4GImmlvjC676xXRyFECn,,,,,"{'_seconds': 1607771892, '_nanoseconds': 46000...",1,1,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473,z6yKyTEjlNMoFUpBlCxg,0.8.0.0,,100,1.0,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-74.87174084241651, -70.40210124639611, -77....",,none,none,...,emS4JG4Hp2H8d2Ab4ofV,,-0.936,-0.072,-0.156,"{'_seconds': 1636904824, '_nanoseconds': 43000...",0,0,1,1
474,zBzfzVgNdiH5EHoPWPjz,0.8.0.0,,100,1.0,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-68.41237424396255, -65.80510348327765, -79....",,baby,none,...,6a9YEoNDIb7J4j5yNXVc,,-0.676,-0.172,-0.58,"{'_seconds': 1636898959, '_nanoseconds': 98800...",1,0,0,1
475,zHvZrXdGjZB73NoeNGOT,0.8.0.0,,100,1.0,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-86.55813594069244, -77.12291636247366, -78....",,adult,none,...,X1ILHgwtdueiSTYr8XdK,,-0.896,-0.156,-0.292,"{'_seconds': 1636483781, '_nanoseconds': 19300...",1,0,0,1
476,zII9nav2JaUQCyxzLKy5,0.8.0.0,,100,1.0,"{""real"":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...","[[-89.49105064688271, -86.80552730271502, -95....",,none,none,...,HAETRoo2NZj9oKG8JBAB,,1.06,0.132,0.504,"{'_seconds': 1620400379, '_nanoseconds': 62000...",0,0,0,0


In [7]:
complete_df = df

df_info = complete_df[['occupants', 'class1', 'class2', 'class3']]
df_info

Unnamed: 0,occupants,class1,class2,class3
0,1,0,0,1
1,3,1,1,1
2,2,1,0,1
3,1,0,1,0
4,2,1,1,0
...,...,...,...,...
473,1,0,0,1
474,1,1,0,0
475,1,1,0,0
476,0,0,0,0


In [8]:
#SPLIT THE DATASET IN TRAIN AND TEST
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(complete_df, test_size=0.15, random_state = 42, stratify=complete_df.occupants)
#LENGHT OF THE TEST DATASET
print(len(train_df))
print(len(test_df))

406
72


## Data preprocessing

In [9]:
working_df = train_df
#TRAINING WILL BE DONE WITH THE FFTDATA COLUMN
train_list = working_df.fftData
train_list = np.array(train_list)
train_x = []

""" Here is performed the frequency selection part of the preprocessing. 
    Since the fft spectrum is divided in two spectrum of 128 bits each, for performing frequency selection 
    we need to select from both the first part and the second one.
    only one/fraction of the frequencies are selected.
"""

#Select only first third of both images
fraction = 3 
fraction_data = int(round(128/fraction)) #fraction_data=43 in this case

for i in range(len(train_list)):
    #print(len(train_list[i]))
    #print(len(train_list[i][0]))
    
    a = np.array(train_list[i])[:, 0 : fraction_data]
    b = np.array(train_list[i])[:, 128 : 128 + fraction_data]
    c = np.concatenate((a, b), axis=1)
    train_x.append(c)
train_arr = []
for x in range(len(train_x)):
    train_arr.append(np.array(train_x[x]))
train_list = train_arr 

"""
zscore normalization part of the preprocessing. correcting the dimension of the network.
"""

print(np.mean(train_list))
print(np.std(train_list))
train_list = scipy.stats.zscore(train_list, axis=None)

#max = np.max(train_list)
#min = np.min(train_list)
#train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])


train_tensor = tf.convert_to_tensor(train_list)

#Third dimension value is 1
train_tensor = tf.expand_dims(train_tensor, -1)

print(train_tensor.shape)

"""
assigning label 
"""

train_label = working_df["occupants"]

#PROPORTIONS OF THE DATASET
passengers0 = 0
passengers1 = 0
passengers2 = 0
passengers3 = 0
for occupants in working_df["occupants"]:
    if occupants == 0:
        passengers0+=1
    if occupants == 1:
        passengers1+=1
    if occupants == 2:
        passengers2+=1
    if occupants == 3:
        passengers3+=1
balancing0 = passengers0/len(train_df)
balancing1 = passengers1/len(train_df)
balancing2 = passengers2/len(train_df)
balancing3 = passengers3/len(train_df)

print(balancing0)
print(balancing1)
print(balancing2)
print(balancing3)

#train_label = tf.keras.utils.to_categorical(train_label, 3)

"""Dimensions of the inputs"""
#53*86 images
img_h, img_w = 53, fraction_data*2
num_classes=3

print(train_label)

-105.31283117083753
14.104729991228874
(406, 53, 86, 1)
0.25862068965517243
0.4064039408866995
0.22167487684729065
0.11330049261083744
376    2
407    1
417    0
271    1
211    1
      ..
224    0
309    1
332    1
63     3
329    1
Name: occupants, Length: 406, dtype: int64


In [11]:
"""
Perform the same preprocessing steps of the training set to the test set too.
"""
test_labels = np.array(test_df["occupants"])

#test_labels = tf.keras.utils.to_categorical(test_labels, 3)

test_list = test_df["fftData"]
test_list = np.array(test_list)
test_x = []

fraction = 3 
fraction_data = int(round(128/fraction))

for i in range(len(test_list)):
    
    a = np.array(test_list[i])[:, 0 : fraction_data]
    b = np.array(test_list[i])[:, 128 : 128 + fraction_data]
    c = np.concatenate((a, b), axis=1)
    test_x.append(c)
test_arr = []
for x in range(len(test_x)):
    test_arr.append(np.array(test_x[x]))
test_list = test_arr 


print(np.mean(test_list))
print(np.std(test_list))
test_list = scipy.stats.zscore(test_list, axis=None)

#max = np.max(train_list)
#min = np.min(train_list)
#train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])
test_tensor = tf.convert_to_tensor(test_list)
test_tensor = tf.expand_dims(test_tensor, -1)
print(test_tensor.shape)
test_images = test_tensor
print(test_labels)

-105.05868403226422
15.283135488519568
(72, 53, 86, 1)
[0 2 0 2 0 1 1 1 3 0 2 0 0 3 1 3 2 1 2 1 2 3 3 2 1 0 1 2 1 1 0 2 1 1 1 0 0
 1 1 2 0 1 0 2 2 1 1 1 2 2 1 3 1 2 0 1 1 1 0 3 2 1 0 1 1 0 0 3 0 0 1 1]


## Loading pre-trained network

In [19]:
folder='4_classes_test_2'
model_name='2-7-3-12-0.3-1'
fraction=3
n_epoch=400

mypath=f'models_full_train/experiment_{folder}/{model_name}/model'
filenames = next(walk(mypath), (None, None, []))[2]

imported_model = load_model(mypath)
model = tf.keras.Sequential()

for layer in imported_model.layers[:-1]: # go through until last layer
    model.add(layer)
model.add(tf.keras.layers.Dense(units=1, activation='linear'))
for layer in model.layers[:-1]:
    layer.trainable = False

model.compile(loss='mse', metrics=['mae'])

## Defining network architecture

In [10]:
# Helper function to run inference on a TFLite model
def run_tflite_model(tflite_file, inputs, targets):

  # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path=str(tflite_file))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    predictions = []
    test_image = inputs
    test_label = targets

    # Check if the input type is quantized, then rescale input data to uint8
    #print(input_details['dtype'])
    if input_details['dtype'] == np.int8:
        #print("correct")
        input_scale, input_zero_point = input_details["quantization"]
        test_image = test_image / input_scale + input_zero_point

    test_image = np.expand_dims(test_image, axis=0).astype(input_details["dtype"])
    interpreter.set_tensor(input_details["index"], test_image)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])[0]
    #print(output)
    predictions[i] = output.argmax()

    return predictions

In [None]:
print('------------------------------------------------------------------------')
print(f'Training regression model {model_name} with transfer learning')
experiment = "regression_test_12" #¡¡¡¡¡CHANGE FOLDER!!!!!!

inputs = np.array(train_tensor)
targets = train_label

mae_per_fold = []
loss_per_fold = []
train_mae_per_fold = []

acc_per_fold_quant = []
train_acc_per_fold_quant = []

fold_no = 1
Y_pred_list = []
Y_true_list = []

#--------------------------------STATIC PARAMETERS------------------------------------------

# Optimization params
# -------------------

# Loss
loss = 'mean_squared_error'

# learning rate
lr = 0.3e-4

optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['mae','accuracy']

batch_size = 32

n_epoch = 100


#------------------------------------CALLBACKS----------------------------------------
callbacks = []

# Early Stopping
# --------------
early_stop = False
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=False,)
    callbacks.append(es_callback)


class_weights={0: balancing0, 1: balancing1, 2: balancing2, 3: balancing3}
history = model.fit(inputs, targets,
            class_weight=class_weights,
            batch_size=batch_size,
            epochs=n_epoch,
            validation_data=(test_images, test_labels),
            callbacks = callbacks,
            verbose=1)
print(history.history.keys())
mae_per_fold.append(history.history['val_mae'])
loss_per_fold.append(history.history['val_loss'])
train_mae_per_fold.append(history.history['mae'])

Y_prediction = model.predict(test_images)
Y_prediction_int=[]
for item in Y_prediction:
    a = round(item[0])
    Y_prediction_int.append(a)

accuracy = 0.0
for i in range(len(Y_prediction_int)):
    if Y_prediction_int[i]==test_labels[i]:
        accuracy += 1
accuracy = accuracy / len(Y_prediction)
#-------------------------------SAVE MODEL-----------------------------------------

MODELS_DIR = f'models/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}/{model_name}/'
try:
    if not os.path.exists(MODELS_DIR):
        os.makedirs(MODELS_DIR)
except e:
    if e.errno != errno.EEXIST:
        raise   
    # time.sleep might help here
    pass

MODEL_TF = MODELS_DIR + f'fold_{fold_no}'
model.save(MODEL_TF)
# Increase fold number

print(model.summary())

#---------------------------SAVE RESULTS TO JSON---------------------------------------
row = {'model': model_name,
       'train_mae' : np.mean(train_mae_per_fold, axis=0).tolist(),
       'valid_mae' : np.mean(mae_per_fold, axis=0).tolist(),
       'Y_true' : Y_prediction_int, 
       'Y_pred' : test_labels.tolist(),
       'valid_accuracy' : accuracy,
       'train_acc_quant':  np.mean(train_acc_per_fold_quant), 
       'valid_acc_quant':  np.mean(acc_per_fold_quant)
}
JSON_DIR = f'json_child/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}'
if not os.path.exists(JSON_DIR):
    os.makedirs(JSON_DIR)
try:
    with open(f'{JSON_DIR}/{model_name}.json', 'w') as f:
        json.dump(row, f)
except Exception as e:
    print(e)


#--------------------------PLOT ACCURACIES CURVES ------------------------------
plt.clf()
plt.plot(np.mean(train_mae_per_fold, axis=0))
plt.plot(np.mean(mae_per_fold, axis=0))
plt.savefig(f'{JSON_DIR}/{model_name}.png')
plt.close()

In [22]:
Y_prediction_int=[]
for item in Y_prediction:
    a = round(item[0])
    Y_prediction_int.append(a)
print(Y_prediction_int)
print(test_labels.tolist())

from sklearn.metrics import confusion_matrix
conf = confusion_matrix(test_labels, Y_prediction_int)
accuracy = 0.0
for i in range(len(Y_prediction_int)):
    if Y_prediction_int[i]==test_labels[i]:
        accuracy += 1
accuracy = accuracy / len(Y_prediction)
print(f"accuracy = {accuracy}")
print(conf)

[0, 1, 0, 2, 0, 1, 1, 1, 2, 0, 2, 1, 0, 2, 1, 1, 2, 0, 1, 1, 1, 2, 2, 2, 1, 0, 1, 2, 0, 1, 1, 2, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1]
[0, 2, 0, 2, 0, 1, 1, 1, 3, 0, 2, 0, 0, 3, 1, 3, 2, 1, 2, 1, 2, 3, 3, 2, 1, 0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 0, 0, 1, 1, 2, 0, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 3, 1, 2, 0, 1, 1, 1, 0, 3, 2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 1, 1]
accuracy = 0.7361111111111112
[[16  3  0  0]
 [ 2 26  1  0]
 [ 0  5 11  0]
 [ 0  2  6  0]]


In [26]:
for i in range(len(test_labels)):
    if test_labels[i] == 3:
        test_labels[i] = 2
for i in range(len(Y_prediction_int)):
    if Y_prediction_int[i] == 3:
        Y_prediction_int[i] = 2
            
print(Y_prediction_int)
print(test_labels.tolist())

from sklearn.metrics import confusion_matrix
conf = confusion_matrix(test_labels, Y_prediction_int)
accuracy = 0.0
for i in range(len(Y_prediction_int)):
    if Y_prediction_int[i]==test_labels[i]:
        accuracy += 1
accuracy = accuracy / len(Y_prediction)
print(f"accuracy = {accuracy}")
print(conf)

[0, 1, 0, 2, 0, 1, 1, 1, 2, 0, 2, 1, 0, 2, 1, 1, 2, 0, 1, 1, 1, 2, 2, 2, 1, 0, 1, 2, 0, 1, 1, 2, 1, 1, 1, 0, 0, 1, 2, 1, 0, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1]
[0, 2, 0, 2, 0, 1, 1, 1, 2, 0, 2, 0, 0, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 0, 1, 2, 1, 1, 0, 2, 1, 1, 1, 0, 0, 1, 1, 2, 0, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 2, 1, 2, 0, 1, 1, 1, 0, 2, 2, 1, 0, 1, 1, 0, 0, 2, 0, 0, 1, 1]
accuracy = 0.8194444444444444
[[16  3  0]
 [ 2 26  1]
 [ 0  7 17]]


## Training with the full dataset (n_epoch 100)

In [30]:
accuracies4=[]
accuracies3=[]
def train_random_states(amount):
    for i in range(amount):
        print('----------------------------------------------------------------------------')
        print(f'RND STATE NUMBER = {i}')
        print('----------------------------------------------------------------------------')
        """SPLIT THE DATASET"""
        from sklearn.model_selection import train_test_split

        train_df, test_df = train_test_split(df, test_size=0.15, random_state = i)
        
        """DATA PREPROCESSING"""
        working_df = train_df
        #TRAINING WILL BE DONE WITH THE FFTDATA COLUMN
        train_list = working_df.fftData
        train_list = np.array(train_list)
        train_x = []

        """ Here is performed the frequency selection part of the preprocessing. 
            Since the fft spectrum is divided in two spectrum of 128 bits each, for performing frequency selection 
            we need to select from both the first part and the second one.
            only one/fraction of the frequencies are selected.
        """

        #Select only first third of both images
        fraction = 3 
        fraction_data = int(round(128/fraction)) #fraction_data=43 in this case

        for i in range(len(train_list)):
            #print(len(train_list[i]))
            #print(len(train_list[i][0]))

            a = np.array(train_list[i])[:, 0 : fraction_data]
            b = np.array(train_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            train_x.append(c)
        train_arr = []
        for x in range(len(train_x)):
            train_arr.append(np.array(train_x[x]))
        train_list = train_arr 

        """
        zscore normalization part of the preprocessing. correcting the dimension of the network.
        """

        print(np.mean(train_list))
        print(np.std(train_list))
        train_list = scipy.stats.zscore(train_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])


        train_tensor = tf.convert_to_tensor(train_list)

        #Third dimension value is 1
        train_tensor = tf.expand_dims(train_tensor, -1)

        print(train_tensor.shape)

        """
        assigning label 
        """

        train_label = working_df["occupants"]

        #PROPORTIONS OF THE DATASET
        passengers0 = 0
        passengers1 = 0
        passengers2 = 0
        passengers3 = 0
        for occupants in working_df["occupants"]:
            if occupants == 0:
                passengers0+=1
            if occupants == 1:
                passengers1+=1
            if occupants == 2:
                passengers2+=1
            if occupants == 3:
                passengers3+=1
        balancing0 = passengers0/len(train_df)
        balancing1 = passengers1/len(train_df)
        balancing2 = passengers2/len(train_df)
        balancing3 = passengers3/len(train_df)

        print(balancing0)
        print(balancing1)
        print(balancing2)
        print(balancing3)

        #train_label = tf.keras.utils.to_categorical(train_label, 3)

        """Dimensions of the inputs"""
        #53*86 images
        img_h, img_w = 53, fraction_data*2
        num_classes=3

        print(train_label)
        
        """
        Perform the same preprocessing steps of the training set to the test set too.
        """
        test_labels = np.array(test_df["occupants"])

        #test_labels = tf.keras.utils.to_categorical(test_labels, 3)

        test_list = test_df["fftData"]
        test_list = np.array(test_list)
        test_x = []

        fraction = 3 
        fraction_data = int(round(128/fraction))

        for i in range(len(test_list)):

            a = np.array(test_list[i])[:, 0 : fraction_data]
            b = np.array(test_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            test_x.append(c)
        test_arr = []
        for x in range(len(test_x)):
            test_arr.append(np.array(test_x[x]))
        test_list = test_arr 


        print(np.mean(test_list))
        print(np.std(test_list))
        test_list = scipy.stats.zscore(test_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])
        test_tensor = tf.convert_to_tensor(test_list)
        test_tensor = tf.expand_dims(test_tensor, -1)
        print(test_tensor.shape)
        test_images = test_tensor
        print(test_labels)
        
        """NETWORK DESIGN"""
        folder='4_classes_test_2'
        model_name='2-7-3-12-0.3-1'
        fraction=3

        mypath=f'models_full_train/experiment_{folder}/{model_name}/model'
        filenames = next(walk(mypath), (None, None, []))[2]

        imported_model = load_model(mypath)
        model = tf.keras.Sequential()

        for layer in imported_model.layers[:-1]: # go through until last layer
            model.add(layer)
        model.add(tf.keras.layers.Dense(units=1, activation='linear'))
        for layer in model.layers[:-1]:
            layer.trainable = False

        model.compile(loss='mse', metrics=['mae'])
        
        print('------------------------------------------------------------------------')
        print(f'Training regression model {model_name} with transfer learning')
        experiment = f"regression_test_12_{i}" #¡¡¡¡¡CHANGE FOLDER!!!!!!

        inputs = np.array(train_tensor)
        targets = train_label

        mae_per_fold = []
        loss_per_fold = []
        train_mae_per_fold = []

        acc_per_fold_quant = []
        train_acc_per_fold_quant = []

        fold_no = 1
        Y_pred_list = []
        Y_true_list = []

        #--------------------------------STATIC PARAMETERS------------------------------------------

        # Optimization params
        # -------------------

        # Loss
        loss = 'mean_squared_error'

        # learning rate
        lr = 0.3e-4

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        # -------------------

        # Validation metrics
        # ------------------

        metrics = ['mae','accuracy']

        batch_size = 32

        n_epoch = 100


        #------------------------------------CALLBACKS----------------------------------------
        callbacks = []

        # Early Stopping
        # --------------
        early_stop = False
        if early_stop:
            es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=False,)
            callbacks.append(es_callback)


        class_weights={0: balancing0, 1: balancing1, 2: balancing2, 3: balancing3}
        history = model.fit(inputs, targets,
                    class_weight=class_weights,
                    batch_size=batch_size,
                    epochs=n_epoch,
                    validation_data=(test_images, test_labels),
                    callbacks = callbacks,
                    verbose=False)
        print(history.history.keys())
        mae_per_fold.append(history.history['val_mae'])
        loss_per_fold.append(history.history['val_loss'])
        train_mae_per_fold.append(history.history['mae'])

        Y_prediction = model.predict(test_images)
        Y_prediction_int=[]
        for item in Y_prediction:
            a = round(item[0])
            Y_prediction_int.append(a)

        accuracy4 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==test_labels[i]:
                accuracy4 += 1
        accuracy4 = accuracy4 / len(Y_prediction)
        
        Y_true = test_labels
        for i in range(len(Y_true)):
            if Y_true[i] == 3:
                Y_true[i] = 2
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i] == 3:
                Y_prediction_int[i] = 2
                
        accuracy3 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==Y_true[i]:
                accuracy3 += 1
        accuracy3 = accuracy3 / len(Y_prediction)
        print(f"accuracy4 = {accuracy4}")
        print(f"accuracy3 = {accuracy3}")
        #-------------------------------SAVE MODEL-----------------------------------------

        MODELS_DIR = f'models/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}/{model_name}/'
        try:
            if not os.path.exists(MODELS_DIR):
                os.makedirs(MODELS_DIR)
        except e:
            if e.errno != errno.EEXIST:
                raise   
            # time.sleep might help here
            pass

        MODEL_TF = MODELS_DIR + f'fold_{fold_no}'
        model.save(MODEL_TF)
        
        accuracies4.append(accuracy4)
        accuracies3.append(accuracy3)
        
    row = {'accuracies4' : accuracies4,
          'accuracies3' : accuracies3}
    JSON_DIR = f'json_child/experiment_regression_BEST/fraction_{fraction}/n_epoch_{n_epoch}'
    if not os.path.exists(JSON_DIR):
        os.makedirs(JSON_DIR)
    try:
        with open(f'{JSON_DIR}/accuracies.json', 'w') as f:
            json.dump(row, f)
    except Exception as e:
        print(e)

In [None]:
train_random_states(40)

## Training with the full dataset (n_epoch 200)

In [32]:
accuracies4=[]
accuracies3=[]
def train_random_states(amount):
    for i in range(amount):
        print('----------------------------------------------------------------------------')
        print(f'RND STATE NUMBER = {i}')
        print('----------------------------------------------------------------------------')
        """SPLIT THE DATASET"""
        from sklearn.model_selection import train_test_split

        train_df, test_df = train_test_split(df, test_size=0.15, random_state = i)
        
        """DATA PREPROCESSING"""
        working_df = train_df
        #TRAINING WILL BE DONE WITH THE FFTDATA COLUMN
        train_list = working_df.fftData
        train_list = np.array(train_list)
        train_x = []

        """ Here is performed the frequency selection part of the preprocessing. 
            Since the fft spectrum is divided in two spectrum of 128 bits each, for performing frequency selection 
            we need to select from both the first part and the second one.
            only one/fraction of the frequencies are selected.
        """

        #Select only first third of both images
        fraction = 3 
        fraction_data = int(round(128/fraction)) #fraction_data=43 in this case

        for i in range(len(train_list)):
            #print(len(train_list[i]))
            #print(len(train_list[i][0]))

            a = np.array(train_list[i])[:, 0 : fraction_data]
            b = np.array(train_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            train_x.append(c)
        train_arr = []
        for x in range(len(train_x)):
            train_arr.append(np.array(train_x[x]))
        train_list = train_arr 

        """
        zscore normalization part of the preprocessing. correcting the dimension of the network.
        """

        print(np.mean(train_list))
        print(np.std(train_list))
        train_list = scipy.stats.zscore(train_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])


        train_tensor = tf.convert_to_tensor(train_list)

        #Third dimension value is 1
        train_tensor = tf.expand_dims(train_tensor, -1)

        print(train_tensor.shape)

        """
        assigning label 
        """

        train_label = working_df["occupants"]

        #PROPORTIONS OF THE DATASET
        passengers0 = 0
        passengers1 = 0
        passengers2 = 0
        passengers3 = 0
        for occupants in working_df["occupants"]:
            if occupants == 0:
                passengers0+=1
            if occupants == 1:
                passengers1+=1
            if occupants == 2:
                passengers2+=1
            if occupants == 3:
                passengers3+=1
        balancing0 = passengers0/len(train_df)
        balancing1 = passengers1/len(train_df)
        balancing2 = passengers2/len(train_df)
        balancing3 = passengers3/len(train_df)

        print(balancing0)
        print(balancing1)
        print(balancing2)
        print(balancing3)

        #train_label = tf.keras.utils.to_categorical(train_label, 3)

        """Dimensions of the inputs"""
        #53*86 images
        img_h, img_w = 53, fraction_data*2
        num_classes=3

        print(train_label)
        
        """
        Perform the same preprocessing steps of the training set to the test set too.
        """
        test_labels = np.array(test_df["occupants"])

        #test_labels = tf.keras.utils.to_categorical(test_labels, 3)

        test_list = test_df["fftData"]
        test_list = np.array(test_list)
        test_x = []

        fraction = 3 
        fraction_data = int(round(128/fraction))

        for i in range(len(test_list)):

            a = np.array(test_list[i])[:, 0 : fraction_data]
            b = np.array(test_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            test_x.append(c)
        test_arr = []
        for x in range(len(test_x)):
            test_arr.append(np.array(test_x[x]))
        test_list = test_arr 


        print(np.mean(test_list))
        print(np.std(test_list))
        test_list = scipy.stats.zscore(test_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])
        test_tensor = tf.convert_to_tensor(test_list)
        test_tensor = tf.expand_dims(test_tensor, -1)
        print(test_tensor.shape)
        test_images = test_tensor
        print(test_labels)
        
        """NETWORK DESIGN"""
        folder='4_classes_test_2'
        model_name='2-7-3-12-0.3-1'
        fraction=3

        mypath=f'models_full_train/experiment_{folder}/{model_name}/model'
        filenames = next(walk(mypath), (None, None, []))[2]

        imported_model = load_model(mypath)
        model = tf.keras.Sequential()

        for layer in imported_model.layers[:-1]: # go through until last layer
            model.add(layer)
        model.add(tf.keras.layers.Dense(units=1, activation='linear'))
        for layer in model.layers[:-1]:
            layer.trainable = False

        model.compile(loss='mse', metrics=['mae'])
        
        print('------------------------------------------------------------------------')
        print(f'Training regression model {model_name} with transfer learning')
        experiment = f"regression_test_12_{i}" #¡¡¡¡¡CHANGE FOLDER!!!!!!

        inputs = np.array(train_tensor)
        targets = train_label

        mae_per_fold = []
        loss_per_fold = []
        train_mae_per_fold = []

        acc_per_fold_quant = []
        train_acc_per_fold_quant = []

        fold_no = 1
        Y_pred_list = []
        Y_true_list = []

        #--------------------------------STATIC PARAMETERS------------------------------------------

        # Optimization params
        # -------------------

        # Loss
        loss = 'mean_squared_error'

        # learning rate
        lr = 0.3e-4

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        # -------------------

        # Validation metrics
        # ------------------

        metrics = ['mae','accuracy']

        batch_size = 32

        n_epoch = 200


        #------------------------------------CALLBACKS----------------------------------------
        callbacks = []

        # Early Stopping
        # --------------
        early_stop = False
        if early_stop:
            es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=False,)
            callbacks.append(es_callback)


        class_weights={0: balancing0, 1: balancing1, 2: balancing2, 3: balancing3}
        history = model.fit(inputs, targets,
                    class_weight=class_weights,
                    batch_size=batch_size,
                    epochs=n_epoch,
                    validation_data=(test_images, test_labels),
                    callbacks = callbacks,
                    verbose=False)
        print(history.history.keys())
        mae_per_fold.append(history.history['val_mae'])
        loss_per_fold.append(history.history['val_loss'])
        train_mae_per_fold.append(history.history['mae'])

        Y_prediction = model.predict(test_images)
        Y_prediction_int=[]
        for item in Y_prediction:
            a = round(item[0])
            Y_prediction_int.append(a)

        accuracy4 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==test_labels[i]:
                accuracy4 += 1
        accuracy4 = accuracy4 / len(Y_prediction)
        
        Y_true = test_labels
        for i in range(len(Y_true)):
            if Y_true[i] == 3:
                Y_true[i] = 2
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i] == 3:
                Y_prediction_int[i] = 2
                
        accuracy3 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==Y_true[i]:
                accuracy3 += 1
        accuracy3 = accuracy3 / len(Y_prediction)
        print(f"accuracy4 = {accuracy4}")
        print(f"accuracy3 = {accuracy3}")
        #-------------------------------SAVE MODEL-----------------------------------------

        MODELS_DIR = f'models/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}/{model_name}/'
        try:
            if not os.path.exists(MODELS_DIR):
                os.makedirs(MODELS_DIR)
        except e:
            if e.errno != errno.EEXIST:
                raise   
            # time.sleep might help here
            pass

        MODEL_TF = MODELS_DIR + f'fold_{fold_no}'
        model.save(MODEL_TF)
        
        accuracies4.append(accuracy4)
        accuracies3.append(accuracy3)
        
    row = {'accuracies4' : accuracies4,
          'accuracies3' : accuracies3}
    JSON_DIR = f'json_child/experiment_regression_BEST/fraction_{fraction}/n_epoch_{n_epoch}'
    if not os.path.exists(JSON_DIR):
        os.makedirs(JSON_DIR)
    try:
        with open(f'{JSON_DIR}/accuracies.json', 'w') as f:
            json.dump(row, f)
    except Exception as e:
        print(e)

In [None]:
train_random_states(40)

## Training with the full dataset (n_epoch 50)

In [34]:
accuracies4=[]
accuracies3=[]
def train_random_states(amount):
    for i in range(amount):
        print('----------------------------------------------------------------------------')
        print(f'RND STATE NUMBER = {i}')
        print('----------------------------------------------------------------------------')
        """SPLIT THE DATASET"""
        from sklearn.model_selection import train_test_split

        train_df, test_df = train_test_split(df, test_size=0.15, random_state = i)
        
        """DATA PREPROCESSING"""
        working_df = train_df
        #TRAINING WILL BE DONE WITH THE FFTDATA COLUMN
        train_list = working_df.fftData
        train_list = np.array(train_list)
        train_x = []

        """ Here is performed the frequency selection part of the preprocessing. 
            Since the fft spectrum is divided in two spectrum of 128 bits each, for performing frequency selection 
            we need to select from both the first part and the second one.
            only one/fraction of the frequencies are selected.
        """

        #Select only first third of both images
        fraction = 3 
        fraction_data = int(round(128/fraction)) #fraction_data=43 in this case

        for i in range(len(train_list)):
            #print(len(train_list[i]))
            #print(len(train_list[i][0]))

            a = np.array(train_list[i])[:, 0 : fraction_data]
            b = np.array(train_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            train_x.append(c)
        train_arr = []
        for x in range(len(train_x)):
            train_arr.append(np.array(train_x[x]))
        train_list = train_arr 

        """
        zscore normalization part of the preprocessing. correcting the dimension of the network.
        """

        print(np.mean(train_list))
        print(np.std(train_list))
        train_list = scipy.stats.zscore(train_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])


        train_tensor = tf.convert_to_tensor(train_list)

        #Third dimension value is 1
        train_tensor = tf.expand_dims(train_tensor, -1)

        print(train_tensor.shape)

        """
        assigning label 
        """

        train_label = working_df["occupants"]

        #PROPORTIONS OF THE DATASET
        passengers0 = 0
        passengers1 = 0
        passengers2 = 0
        passengers3 = 0
        for occupants in working_df["occupants"]:
            if occupants == 0:
                passengers0+=1
            if occupants == 1:
                passengers1+=1
            if occupants == 2:
                passengers2+=1
            if occupants == 3:
                passengers3+=1
        balancing0 = passengers0/len(train_df)
        balancing1 = passengers1/len(train_df)
        balancing2 = passengers2/len(train_df)
        balancing3 = passengers3/len(train_df)

        print(balancing0)
        print(balancing1)
        print(balancing2)
        print(balancing3)

        #train_label = tf.keras.utils.to_categorical(train_label, 3)

        """Dimensions of the inputs"""
        #53*86 images
        img_h, img_w = 53, fraction_data*2
        num_classes=3

        print(train_label)
        
        """
        Perform the same preprocessing steps of the training set to the test set too.
        """
        test_labels = np.array(test_df["occupants"])

        #test_labels = tf.keras.utils.to_categorical(test_labels, 3)

        test_list = test_df["fftData"]
        test_list = np.array(test_list)
        test_x = []

        fraction = 3 
        fraction_data = int(round(128/fraction))

        for i in range(len(test_list)):

            a = np.array(test_list[i])[:, 0 : fraction_data]
            b = np.array(test_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            test_x.append(c)
        test_arr = []
        for x in range(len(test_x)):
            test_arr.append(np.array(test_x[x]))
        test_list = test_arr 


        print(np.mean(test_list))
        print(np.std(test_list))
        test_list = scipy.stats.zscore(test_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])
        test_tensor = tf.convert_to_tensor(test_list)
        test_tensor = tf.expand_dims(test_tensor, -1)
        print(test_tensor.shape)
        test_images = test_tensor
        print(test_labels)
        
        """NETWORK DESIGN"""
        folder='4_classes_test_2'
        model_name='2-7-3-12-0.3-1'
        fraction=3

        mypath=f'models_full_train/experiment_{folder}/{model_name}/model'
        filenames = next(walk(mypath), (None, None, []))[2]

        imported_model = load_model(mypath)
        model = tf.keras.Sequential()

        for layer in imported_model.layers[:-1]: # go through until last layer
            model.add(layer)
        model.add(tf.keras.layers.Dense(units=1, activation='linear'))
        for layer in model.layers[:-1]:
            layer.trainable = False

        model.compile(loss='mse', metrics=['mae'])
        
        print('------------------------------------------------------------------------')
        print(f'Training regression model {model_name} with transfer learning')
        experiment = f"regression_test_12_{i}" #¡¡¡¡¡CHANGE FOLDER!!!!!!

        inputs = np.array(train_tensor)
        targets = train_label

        mae_per_fold = []
        loss_per_fold = []
        train_mae_per_fold = []

        acc_per_fold_quant = []
        train_acc_per_fold_quant = []

        fold_no = 1
        Y_pred_list = []
        Y_true_list = []

        #--------------------------------STATIC PARAMETERS------------------------------------------

        # Optimization params
        # -------------------

        # Loss
        loss = 'mean_squared_error'

        # learning rate
        lr = 0.3e-4

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        # -------------------

        # Validation metrics
        # ------------------

        metrics = ['mae','accuracy']

        batch_size = 32

        n_epoch = 50


        #------------------------------------CALLBACKS----------------------------------------
        callbacks = []

        # Early Stopping
        # --------------
        early_stop = False
        if early_stop:
            es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=False,)
            callbacks.append(es_callback)


        class_weights={0: balancing0, 1: balancing1, 2: balancing2, 3: balancing3}
        history = model.fit(inputs, targets,
                    class_weight=class_weights,
                    batch_size=batch_size,
                    epochs=n_epoch,
                    validation_data=(test_images, test_labels),
                    callbacks = callbacks,
                    verbose=False)
        print(history.history.keys())
        mae_per_fold.append(history.history['val_mae'])
        loss_per_fold.append(history.history['val_loss'])
        train_mae_per_fold.append(history.history['mae'])

        Y_prediction = model.predict(test_images)
        Y_prediction_int=[]
        for item in Y_prediction:
            a = round(item[0])
            Y_prediction_int.append(a)

        accuracy4 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==test_labels[i]:
                accuracy4 += 1
        accuracy4 = accuracy4 / len(Y_prediction)
        
        Y_true = test_labels
        for i in range(len(Y_true)):
            if Y_true[i] == 3:
                Y_true[i] = 2
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i] == 3:
                Y_prediction_int[i] = 2
                
        accuracy3 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==Y_true[i]:
                accuracy3 += 1
        accuracy3 = accuracy3 / len(Y_prediction)
        print(f"accuracy4 = {accuracy4}")
        print(f"accuracy3 = {accuracy3}")
        #-------------------------------SAVE MODEL-----------------------------------------

        MODELS_DIR = f'models/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}/{model_name}/'
        try:
            if not os.path.exists(MODELS_DIR):
                os.makedirs(MODELS_DIR)
        except e:
            if e.errno != errno.EEXIST:
                raise   
            # time.sleep might help here
            pass

        MODEL_TF = MODELS_DIR + f'fold_{fold_no}'
        model.save(MODEL_TF)
        
        accuracies4.append(accuracy4)
        accuracies3.append(accuracy3)
        
    row = {'accuracies4' : accuracies4,
          'accuracies3' : accuracies3}
    JSON_DIR = f'json_child/experiment_regression_BEST/fraction_{fraction}/n_epoch_{n_epoch}'
    if not os.path.exists(JSON_DIR):
        os.makedirs(JSON_DIR)
    try:
        with open(f'{JSON_DIR}/accuracies.json', 'w') as f:
            json.dump(row, f)
    except Exception as e:
        print(e)

In [None]:
train_random_states(40)

## Training with the full dataset (n_epoch 300)

In [36]:
accuracies4=[]
accuracies3=[]
def train_random_states(amount):
    for i in range(amount):
        print('----------------------------------------------------------------------------')
        print(f'RND STATE NUMBER = {i}')
        print('----------------------------------------------------------------------------')
        """SPLIT THE DATASET"""
        from sklearn.model_selection import train_test_split

        train_df, test_df = train_test_split(df, test_size=0.15, random_state = i)
        
        """DATA PREPROCESSING"""
        working_df = train_df
        #TRAINING WILL BE DONE WITH THE FFTDATA COLUMN
        train_list = working_df.fftData
        train_list = np.array(train_list)
        train_x = []

        """ Here is performed the frequency selection part of the preprocessing. 
            Since the fft spectrum is divided in two spectrum of 128 bits each, for performing frequency selection 
            we need to select from both the first part and the second one.
            only one/fraction of the frequencies are selected.
        """

        #Select only first third of both images
        fraction = 3 
        fraction_data = int(round(128/fraction)) #fraction_data=43 in this case

        for i in range(len(train_list)):
            #print(len(train_list[i]))
            #print(len(train_list[i][0]))

            a = np.array(train_list[i])[:, 0 : fraction_data]
            b = np.array(train_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            train_x.append(c)
        train_arr = []
        for x in range(len(train_x)):
            train_arr.append(np.array(train_x[x]))
        train_list = train_arr 

        """
        zscore normalization part of the preprocessing. correcting the dimension of the network.
        """

        print(np.mean(train_list))
        print(np.std(train_list))
        train_list = scipy.stats.zscore(train_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])


        train_tensor = tf.convert_to_tensor(train_list)

        #Third dimension value is 1
        train_tensor = tf.expand_dims(train_tensor, -1)

        print(train_tensor.shape)

        """
        assigning label 
        """

        train_label = working_df["occupants"]

        #PROPORTIONS OF THE DATASET
        passengers0 = 0
        passengers1 = 0
        passengers2 = 0
        passengers3 = 0
        for occupants in working_df["occupants"]:
            if occupants == 0:
                passengers0+=1
            if occupants == 1:
                passengers1+=1
            if occupants == 2:
                passengers2+=1
            if occupants == 3:
                passengers3+=1
        balancing0 = passengers0/len(train_df)
        balancing1 = passengers1/len(train_df)
        balancing2 = passengers2/len(train_df)
        balancing3 = passengers3/len(train_df)

        print(balancing0)
        print(balancing1)
        print(balancing2)
        print(balancing3)

        #train_label = tf.keras.utils.to_categorical(train_label, 3)

        """Dimensions of the inputs"""
        #53*86 images
        img_h, img_w = 53, fraction_data*2
        num_classes=3

        print(train_label)
        
        """
        Perform the same preprocessing steps of the training set to the test set too.
        """
        test_labels = np.array(test_df["occupants"])

        #test_labels = tf.keras.utils.to_categorical(test_labels, 3)

        test_list = test_df["fftData"]
        test_list = np.array(test_list)
        test_x = []

        fraction = 3 
        fraction_data = int(round(128/fraction))

        for i in range(len(test_list)):

            a = np.array(test_list[i])[:, 0 : fraction_data]
            b = np.array(test_list[i])[:, 128 : 128 + fraction_data]
            c = np.concatenate((a, b), axis=1)
            test_x.append(c)
        test_arr = []
        for x in range(len(test_x)):
            test_arr.append(np.array(test_x[x]))
        test_list = test_arr 


        print(np.mean(test_list))
        print(np.std(test_list))
        test_list = scipy.stats.zscore(test_list, axis=None)

        #max = np.max(train_list)
        #min = np.min(train_list)
        #train_list = np.array([[[(x - min) / (max - min) for x in y] for y in z] for z in train_list])
        test_tensor = tf.convert_to_tensor(test_list)
        test_tensor = tf.expand_dims(test_tensor, -1)
        print(test_tensor.shape)
        test_images = test_tensor
        print(test_labels)
        
        """NETWORK DESIGN"""
        folder='4_classes_test_2'
        model_name='2-7-3-12-0.3-1'
        fraction=3

        mypath=f'models_full_train/experiment_{folder}/{model_name}/model'
        filenames = next(walk(mypath), (None, None, []))[2]

        imported_model = load_model(mypath)
        model = tf.keras.Sequential()

        for layer in imported_model.layers[:-1]: # go through until last layer
            model.add(layer)
        model.add(tf.keras.layers.Dense(units=1, activation='linear'))
        for layer in model.layers[:-1]:
            layer.trainable = False

        model.compile(loss='mse', metrics=['mae'])
        
        print('------------------------------------------------------------------------')
        print(f'Training regression model {model_name} with transfer learning')
        experiment = f"regression_test_12_{i}" #¡¡¡¡¡CHANGE FOLDER!!!!!!

        inputs = np.array(train_tensor)
        targets = train_label

        mae_per_fold = []
        loss_per_fold = []
        train_mae_per_fold = []

        acc_per_fold_quant = []
        train_acc_per_fold_quant = []

        fold_no = 1
        Y_pred_list = []
        Y_true_list = []

        #--------------------------------STATIC PARAMETERS------------------------------------------

        # Optimization params
        # -------------------

        # Loss
        loss = 'mean_squared_error'

        # learning rate
        lr = 0.3e-4

        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
        # -------------------

        # Validation metrics
        # ------------------

        metrics = ['mae','accuracy']

        batch_size = 32

        n_epoch = 300


        #------------------------------------CALLBACKS----------------------------------------
        callbacks = []

        # Early Stopping
        # --------------
        early_stop = False
        if early_stop:
            es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=False,)
            callbacks.append(es_callback)


        class_weights={0: balancing0, 1: balancing1, 2: balancing2, 3: balancing3}
        history = model.fit(inputs, targets,
                    class_weight=class_weights,
                    batch_size=batch_size,
                    epochs=n_epoch,
                    validation_data=(test_images, test_labels),
                    callbacks = callbacks,
                    verbose=False)
        print(history.history.keys())
        mae_per_fold.append(history.history['val_mae'])
        loss_per_fold.append(history.history['val_loss'])
        train_mae_per_fold.append(history.history['mae'])

        Y_prediction = model.predict(test_images)
        Y_prediction_int=[]
        for item in Y_prediction:
            a = round(item[0])
            Y_prediction_int.append(a)

        accuracy4 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==test_labels[i]:
                accuracy4 += 1
        accuracy4 = accuracy4 / len(Y_prediction)
        
        Y_true = test_labels
        for i in range(len(Y_true)):
            if Y_true[i] == 3:
                Y_true[i] = 2
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i] == 3:
                Y_prediction_int[i] = 2
                
        accuracy3 = 0.0
        for i in range(len(Y_prediction_int)):
            if Y_prediction_int[i]==Y_true[i]:
                accuracy3 += 1
        accuracy3 = accuracy3 / len(Y_prediction)
        print(f"accuracy4 = {accuracy4}")
        print(f"accuracy3 = {accuracy3}")
        #-------------------------------SAVE MODEL-----------------------------------------

        MODELS_DIR = f'models/experiment_{experiment}/fraction_{fraction}/n_epoch_{n_epoch}/{model_name}/'
        try:
            if not os.path.exists(MODELS_DIR):
                os.makedirs(MODELS_DIR)
        except e:
            if e.errno != errno.EEXIST:
                raise   
            # time.sleep might help here
            pass

        MODEL_TF = MODELS_DIR + f'fold_{fold_no}'
        model.save(MODEL_TF)
        
        accuracies4.append(accuracy4)
        accuracies3.append(accuracy3)
        
    row = {'accuracies4' : accuracies4,
          'accuracies3' : accuracies3}
    JSON_DIR = f'json_child/experiment_regression_BEST/fraction_{fraction}/n_epoch_{n_epoch}'
    if not os.path.exists(JSON_DIR):
        os.makedirs(JSON_DIR)
    try:
        with open(f'{JSON_DIR}/accuracies.json', 'w') as f:
            json.dump(row, f)
    except Exception as e:
        print(e)

In [None]:
train_random_states(40)

## Results are saved on a .json file