#### CIE-Project B || Group - 7

#### Imports

In [1]:
import os
import pandas as pd
import numpy as np
import scipy as sp
from scipy.signal import resample, find_peaks
from scipy.io import loadmat
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import pickle
import time

# from sklearn import preprocessing ##importing for normalization
# from sklearn.decomposition import PCA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import MinMaxScaler, minmax_scale, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error
from sklearn.multioutput import MultiOutputRegressor
from sklearn import svm
from sklearn.neighbors import KNeighborsRegressor

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D, AveragePooling1D
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
from keras.utils import np_utils
from keras_tuner.tuners import RandomSearch
from keras_tuner.engine.hyperparameters import HyperParameters

plt.rcParams['figure.dpi'] = 175
plt.rcParams['axes.grid'] = True
plt.rcParams['legend.frameon'] = True
plt.rcParams['figure.figsize'] = (8, 8)
%matplotlib widget
# %matplotlib inline

tf.random.set_seed(1)
np.random.seed(2)

##### Get the root directory and change to it

In [2]:
rootdir = os.getcwd()
os.chdir(rootdir)

#### Folder Variables Assignment

In [3]:
# Names of the folders
Training_Folder = 'EPOT_DATA'
Validation_Aug_Folder = 'Validation_augmented_data'
Experiment = 'Experiment_Data_Group7'
Ex_Validation = 'Experimental_validation'

counter = 0

#### Parsing

##### Parse training and validation-augmented data

In [4]:
# Initialize lists for training and validation
training_tr = []; training_tr_labels = []

validation_aug = []; validation_aug_labels = [] # validation augmented data

#------------------------------------------------------------------------------

for subdir, dirs, files in os.walk(rootdir):
    
    if Training_Folder.lower() in subdir.lower():
        for file in files:
            if file.endswith('.mat'):
                lbl = [file.split('_')[1], file.split('_')[2][:3]]
                lb = np.asarray(lbl, dtype=float)
                # load mat-file
                mat = loadmat(os.path.join(subdir, file))
                # get the numpy data from the mat file
                mat = mat['num_data']
                # append data to the list
                training_tr.append(mat)
                training_tr_labels.append(lb)

    elif Validation_Aug_Folder.lower() in subdir.lower():
        for file in files:
            if file.endswith('.mat'):
                lbl = [file.split('_')[1], file.split('_')[2][:3]]
                lb = np.asarray(lbl, dtype=float)
                # load mat-file
                mat = loadmat(os.path.join(subdir, file))
                # get the numpy data from the mat file
                mat = mat['num_data']
                # append data to the list
                validation_aug.append(mat)
                validation_aug_labels.append(lb)

In [5]:
plt.close()
fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)

l = 6

fig.suptitle(f'EPOT Training Data - {training_tr_labels[l]}')

axs[0,0].plot(training_tr[l][:,0], training_tr[l][:,1], label='P - 1', c = 'r')
axs[0,1].plot(training_tr[l][:,0], training_tr[l][:,2], label='P - 2', c = 'g')
axs[1,0].plot(training_tr[l][:,0], training_tr[l][:,3], label='P - 3', c = 'b')
axs[1,1].plot(training_tr[l][:,0], training_tr[l][:,4], label='P - 4', c = 'y')

fig.legend()
plt.show()

##### Parse, Cut, Normalize, and Resample the experimental data

In [6]:
# Initialize lists for training and validation
ex_data = []; ex_labels = []

#------------------------------------------------------------------------------

for subdir, dirs, files in os.walk(rootdir):
    
    if Experiment.lower() in subdir.lower():
        for file in files:
            if file.endswith('.txt'):
                
                with open(os.path.join(subdir, file), 'r') as f:
                    x = pd.read_csv(os.path.join(subdir, file), delimiter='\t', decimal=',', skiprows=8, header=None)
                    x = np.asarray(x, dtype=float)
                    ex_data.append(x)

In [7]:
plt.close()
l = 5
plt.plot(ex_data[l][:,0], ex_data[l][:,1], label='P - 1', c = 'r')
plt.show()

In [8]:
ex_cut_data = []
for i in range(len(ex_data)):
    
    threshold = 0.1

    x = np.where(abs(ex_data[i][:,1])>threshold)[0][0]
    y = np.where(abs(ex_data[i][:,2])>threshold)[0][0]
    z = np.where(abs(ex_data[i][:,3])>threshold)[0][0]
    p = np.where(abs(ex_data[i][:,4])>threshold)[0][0]

    start = min(x,y,z,p)
    end = start+200

    d = np.empty_like(ex_data[i][start:end,:])

    d[:,0] = ex_data[i][start:end, 0]
    d[:,1] = (-1)*ex_data[i][start:end, 1]
    d[:,2] = (-1)*ex_data[i][start:end, 2]
    d[:,3] = (-1)*ex_data[i][start:end, 4]
    d[:,4] = (-1)*ex_data[i][start:end, 3]

    ex_cut_data.append(d)

In [9]:
plt.close()
l = 5
plt.plot(ex_cut_data[l][:,0], ex_cut_data[l][:,1], label='P - 1')
plt.plot(ex_cut_data[l][:,0], ex_cut_data[l][:,2], label='P - 2')
plt.plot(ex_cut_data[l][:,0], ex_cut_data[l][:,3], label='P - 3')
plt.plot(ex_cut_data[l][:,0], ex_cut_data[l][:,4], label='P - 4')
plt.show()

Normalize and Resample experimental data

In [10]:
# create variables for normalized data
ex_norm = np.zeros_like(np.asarray(ex_cut_data))

for i in range (0, len(ex_cut_data)):
    for j in range (1, 5):
        ex_norm[i][:,j] = minmax_scale(ex_cut_data[i][:,j])

In [11]:
ex_resample = []

# create a dataframe from the normalized data
for i in range (0, len(ex_norm)):
    ex_resample.append(pd.DataFrame(ex_norm[i], columns = ['time', '1', '2', '3', '4']))
    # drop the time column
    ex_resample[i] = ex_resample[i].drop(columns = ['time'])

size = 100

for i in range (0, len(ex_resample)):
    ex_resample[i] = resample(ex_resample[i], size)

np.asarray(ex_resample).shape

(18, 100, 4)

##### Parse, Cut, Normalize, and Resample the experimental validation data

In [12]:
# Initialize lists for training and validation
ex_val_data = []; ex_val_labels = []

#------------------------------------------------------------------------------

for subdir, dirs, files in os.walk(rootdir):
    
    if Ex_Validation.lower() in subdir.lower():
        for file in files:
            if file.endswith('.txt'):
                
                with open(os.path.join(subdir, file), 'r') as f:
                    # print(file.split('_')[2], file.split('_')[3], file.split('_')[5][:3])
                    lbl = [file.split('_')[2], file.split('_')[3], file.split('_')[5][:3]]
                    lb = np.asarray(lbl, dtype=float)

                    x = pd.read_csv(os.path.join(subdir, file), delimiter='\t', decimal=',', skiprows=8, header=None)
                    x = np.asarray(x, dtype=float)
                    ex_val_data.append(x)
                    ex_val_labels.append(lb)


ex_val_trails = np.asarray(ex_val_labels)[:,2]
ex_val_labels = np.delete(ex_val_labels, 2, 1)

In [13]:
# ex_val_cut_data=[]
# for i in range(0,len(ex_val_data)):
#     data=ex_data[i][:,0]
    
#     start_time = 0.04996
#     end_time = 0.05016
#     start=np.where(data==(start_time))
#     end = np.where(data==(end_time))
    
#     d = np.zeros(((start[0][0]+200-start[0][0]),5))

#     d[:,0]=ex_val_data[i][start[0][0]:start[0][0]+200,0]
#     d[:,1]=ex_val_data[i][start[0][0]:start[0][0]+200,1]
#     d[:,2]=ex_val_data[i][start[0][0]:start[0][0]+200,2]
#     d[:,3]=ex_val_data[i][start[0][0]:start[0][0]+200,4]
#     d[:,4]=ex_val_data[i][start[0][0]:start[0][0]+200,3]
    
#     ex_val_cut_data.append(d)
#----------------------------------------------------------------------------------------


ex_val_cut_data = []
for i in range(len(ex_val_data)):
    
    threshold = 0.1

    x = np.where(abs(ex_val_data[i][:,1])>threshold)[0][0]
    y = np.where(abs(ex_val_data[i][:,2])>threshold)[0][0]
    z = np.where(abs(ex_val_data[i][:,3])>threshold)[0][0]
    p = np.where(abs(ex_val_data[i][:,4])>threshold)[0][0]

    start = min(x,y,z,p)
    end = start+180

    d = np.empty_like(ex_val_data[i][start:end,:])

    d[:,0] = ex_val_data[i][start:end, 0]
    d[:,1] = (-1)*ex_val_data[i][start:end, 1]
    d[:,2] = (-1)*ex_val_data[i][start:end, 2]
    d[:,3] = (-1)*ex_val_data[i][start:end, 4]
    d[:,4] = (-1)*ex_val_data[i][start:end, 3]

    ex_val_cut_data.append(d)

Normalize and Resample experimental validation data

In [14]:
# create variables for normalized data
ex_val_norm = np.zeros_like(np.asarray(ex_val_cut_data)); ex_val_norm_labels = np.zeros_like(np.asarray(ex_val_cut_data))

for i in range (0, len(ex_val_cut_data)):
    for j in range (1, 5):
        ex_val_norm[i][:,j] = minmax_scale(ex_val_cut_data[i][:,j])

scalar = MinMaxScaler()
ex_val_norm_labels = scalar.fit_transform(ex_val_labels)

In [15]:
ex_val_resample = []

# create a dataframe from the normalized data
for i in range (0, len(ex_val_norm)):
    ex_val_resample.append(pd.DataFrame(ex_val_norm[i], columns = ['time', '1', '2', '3', '4']))
    # drop the time column
    ex_val_resample[i] = ex_val_resample[i].drop(columns = ['time'])

size = 100

for i in range (0, len(ex_val_resample)):
    ex_val_resample[i] = resample(ex_val_resample[i], size)

np.asarray(ex_val_resample).shape

(24, 100, 4)

#### Augmentation

##### Variable initialization

In [16]:
## -----------------------------------------------------------------------------
#Assuming P1 on top and counter clockwise order
## -----------------------------------------------------------------------------


# Variables for the augmented data
training_tl = np.zeros_like(np.asarray(training_tr)); training_tl_labels = np.zeros_like(np.asarray(training_tr_labels))
training_bl = np.zeros_like(np.asarray(training_tr)); training_bl_labels = np.zeros_like(np.asarray(training_tr_labels))
training_br = np.zeros_like(np.asarray(training_tr)); training_br_labels = np.zeros_like(np.asarray(training_tr_labels))

##### Top-Left

In [17]:
for i in range (0, len(training_tr)):
        training_tl[i][:,0] = training_tr[i][:,0]
        training_tl[i][:,1] = training_tr[i][:,1]
        training_tl[i][:,2] = training_tr[i][:,4]
        training_tl[i][:,3] = training_tr[i][:,3]
        training_tl[i][:,4] = training_tr[i][:,2]

for i in range (0, len(training_tl_labels)):
    training_tl_labels[i][0] = 500 - training_tr_labels[i][0]
    training_tl_labels[i][1] = training_tr_labels[i][1]

##### Bottom-Left

In [18]:
for i in range (0, len(training_bl)):
    training_bl[i][:,0] = training_tl[i][:,0]
    training_bl[i][:,1] = training_tl[i][:,3]
    training_bl[i][:,2] = training_tl[i][:,2]
    training_bl[i][:,3] = training_tl[i][:,1]
    training_bl[i][:,4] = training_tl[i][:,4]

for i in range (0, len(training_bl_labels)):
    training_bl_labels[i][0] = 500. - training_tr_labels[i][0]
    training_bl_labels[i][1] = 500. - training_tr_labels[i][1]

##### Bottom-Right

In [19]:
for i in range (0, len(training_br)):
        training_br[i][:,0] = training_bl[i][:,0]
        training_br[i][:,1] = training_bl[i][:,1]
        training_br[i][:,2] = training_bl[i][:,4]
        training_br[i][:,3] = training_bl[i][:,3]
        training_br[i][:,4] = training_bl[i][:,2]

for i in range (0, len(training_br_labels)):
    training_br_labels[i][0] = training_tr_labels[i][0]
    training_br_labels[i][1] = 500 - training_tr_labels[i][1]

##### Delete overlapping data points

In [20]:
training_tl = np.delete(np.asarray(training_tl), np.where(np.asanyarray(training_tl_labels) == 250.)[0], axis = 0)
training_tl_labels = np.delete(np.asarray(training_tl_labels), np.where(np.asanyarray(training_tl_labels) == 250.)[0], axis = 0)

training_bl = np.delete(np.asarray(training_bl), 0, axis = 0)
training_bl_labels = np.delete(np.asarray(training_bl_labels), 0, axis = 0)

training_br = np.delete(np.asarray(training_br), np.where(np.asanyarray(training_br_labels) == 250.)[0], axis = 0)
training_br_labels = np.delete(np.asarray(training_br_labels), np.where(np.asanyarray(training_br_labels) == 250.)[0], axis = 0)

##### Makesure the augmentation is done on all quadrants

In [21]:
print(f'{np.asarray(training_tl).shape} \t{np.asarray(training_tr).shape} \n{np.asarray(training_bl).shape} \t{np.asarray(training_br).shape} \n{counter}')

(219, 20000, 5) 	(244, 20000, 5) 
(243, 20000, 5) 	(219, 20000, 5) 
0


In [22]:
plt.close()
for i in range(0, len(training_tr_labels)):
    plt.plot(training_tr_labels[i][0], training_tr_labels[i][1], 'o', c = 'r')
for j in range (0, len(training_tl_labels)):
    plt.plot(training_tl_labels[j][0], training_tl_labels[j][1], 's', c = 'g')
    plt.plot(training_br_labels[j][0], training_br_labels[j][1], 'P', c = 'y')
for k in range (0, len(training_bl_labels)):
    plt.plot(training_bl_labels[k][0], training_bl_labels[k][1], '*', c = 'b')

plt.xticks(np.arange(175, 330, 5), rotation = 'vertical')
plt.yticks(np.arange(175, 330, 5))
plt.show()

#### Concatenate, Cut, Normalize, Resample Training Data

##### Concatenate

In [23]:
training = np.concatenate((training_tr, training_tl, training_bl, training_br), axis = 0)
training_labels = np.concatenate((training_tr_labels, training_tl_labels, training_bl_labels, training_br_labels), axis = 0)

print(f'Shape of all training set: {training.shape} \nShape of all training labels: {training_labels.shape}')

Shape of all training set: (925, 20000, 5) 
Shape of all training labels: (925, 2)


##### check with the validation augmented data

In [24]:
plt.close()
fig, axs = plt.subplots(2, 4, sharex=True)

val_c = 1 # Given validation folder has 4 data points. so use - (0, 1, 2, 3)

# find out the index of the validation set in the training data
x = []
for i in range (0, len(training_labels)):
    if (training_labels[i][0] == validation_aug_labels[val_c][0]) and (training_labels[i][1] == validation_aug_labels[val_c][1]):
        x.append(i)

# plot to compare the validation set with the training set
fig.suptitle(f'EPOT Training Data Coordinates - {training_labels[x[0]]} \nValidation Augmented Data Coordinates - {validation_aug_labels[val_c]}')

axs[0,0].plot(training[x[0]][:,0], training[x[0]][:,1], label='P - 1', c = 'r')
axs[0,1].plot(training[x[0]][:,0], training[x[0]][:,2], label='P - 2', c = 'g')
axs[0,2].plot(training[x[0]][:,0], training[x[0]][:,3], label='P - 3', c = 'b')
axs[0,3].plot(training[x[0]][:,0], training[x[0]][:,4], label='P - 4', c = 'y')

axs[1,0].plot(validation_aug[val_c][:,0], validation_aug[val_c][:,1], label='P - 1', c = 'r')
axs[1,1].plot(validation_aug[val_c][:,0], validation_aug[val_c][:,2], label='P - 2', c = 'g')
axs[1,2].plot(validation_aug[val_c][:,0], validation_aug[val_c][:,3], label='P - 3', c = 'b')
axs[1,3].plot(validation_aug[val_c][:,0], validation_aug[val_c][:,4], label='P - 4', c = 'y')

# check if the data is exactly the same
for i in range (1, 5):
    if np.all(training[x[0]][:,i] == validation_aug[val_c][:,i], axis = 0):
        print('Data is augmented correctly')

fig.legend()
plt.show()

In [25]:
diff = training[x[0]][:,1] - validation_aug[val_c][:,1]
diff

array([0.     , 0.     , 0.     , ..., 0.00011, 0.00012, 0.00012])

##### -- Check -- for outliers

In [26]:
'''
plt.close()
fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)

for i in range(len(training)):
    axs[0,0].plot(training[i][:,0], training[i][:,1], label='Training P-1')
    axs[0,1].plot(training[i][:,0], training[i][:,2], label='Training P-2')
    axs[1,0].plot(training[i][:,0], training[i][:,3], label='Training P-3')
    axs[1,1].plot(training[i][:,0], training[i][:,4], label='Training P-4')

plt.show()
'''

"\nplt.close()\nfig, axs = plt.subplots(2, 2, sharex=True, sharey=True)\n\nfor i in range(len(training)):\n    axs[0,0].plot(training[i][:,0], training[i][:,1], label='Training P-1')\n    axs[0,1].plot(training[i][:,0], training[i][:,2], label='Training P-2')\n    axs[1,0].plot(training[i][:,0], training[i][:,3], label='Training P-3')\n    axs[1,1].plot(training[i][:,0], training[i][:,4], label='Training P-4')\n\nplt.show()\n"

In [27]:
'''
%matplotlib inline
sns.set(rc={'figure.figsize':(30, 30)})

x = []; y = []; z = []

for i in range(len(training)):
    xi = training_labels[i][0]
    x.append(xi)
    yi = training_labels[i][1]
    y.append(yi)
    zi = pd.DataFrame(training[i][:,1]).max()[0]
    # zi = pd.DataFrame(training[i][:,2]).max()[0]
    # zi = pd.DataFrame(training[i][:,3]).max()[0]
    # zi = pd.DataFrame(training[i][:,4]).max()[0]

    z.append(zi)

df = pd.DataFrame({'x': x, 'y': y, 'z': z})

heatmap1_data = pd.pivot_table(df, values='z', index=['y'], columns='x')

ax = sns.heatmap(heatmap1_data, 
                square=True, 
                annot = True, 
                annot_kws={"fontsize": 10}, 
                fmt='.1f', 
                linewidths=0.1, 
                linecolor='gray',
                cbar_kws = dict(use_gridspec=False, location='right'))

ax.invert_yaxis()
'''

'\n%matplotlib inline\nsns.set(rc={\'figure.figsize\':(30, 30)})\n\nx = []; y = []; z = []\n\nfor i in range(len(training)):\n    xi = training_labels[i][0]\n    x.append(xi)\n    yi = training_labels[i][1]\n    y.append(yi)\n    zi = pd.DataFrame(training[i][:,1]).max()[0]\n    # zi = pd.DataFrame(training[i][:,2]).max()[0]\n    # zi = pd.DataFrame(training[i][:,3]).max()[0]\n    # zi = pd.DataFrame(training[i][:,4]).max()[0]\n\n    z.append(zi)\n\ndf = pd.DataFrame({\'x\': x, \'y\': y, \'z\': z})\n\nheatmap1_data = pd.pivot_table(df, values=\'z\', index=[\'y\'], columns=\'x\')\n\nax = sns.heatmap(heatmap1_data, \n                square=True, \n                annot = True, \n                annot_kws={"fontsize": 10}, \n                fmt=\'.1f\', \n                linewidths=0.1, \n                linecolor=\'gray\',\n                cbar_kws = dict(use_gridspec=False, location=\'right\'))\n\nax.invert_yaxis()\n'

Experimental Validation

In [28]:
'''
%matplotlib inline
sns.set(rc={'figure.figsize':(30, 30)})

x = []; y = []; z = []

for i in range(len(ex_val_data)):
    xi = ex_val_labels[i][0]
    x.append(xi)
    yi = ex_val_labels[i][1]
    y.append(yi)
    zi = pd.DataFrame(ex_val_data[i][:,1]).max()[0]
    # zi = pd.DataFrame(training[i][:,2]).max()[0]
    # zi = pd.DataFrame(training[i][:,3]).max()[0]
    # zi = pd.DataFrame(training[i][:,4]).max()[0]

    z.append(zi)

df = pd.DataFrame({'x': x, 'y': y, 'z': z})

heatmap1_data = pd.pivot_table(df, values='z', index=['y'], columns='x')

ax = sns.heatmap(heatmap1_data, 
                square=True, 
                annot = True, 
                annot_kws={"fontsize": 10}, 
                fmt='.1f', 
                linewidths=0.1, 
                linecolor='gray',
                cbar_kws = dict(use_gridspec=False, location='right'))

ax.invert_yaxis()
'''

'\n%matplotlib inline\nsns.set(rc={\'figure.figsize\':(30, 30)})\n\nx = []; y = []; z = []\n\nfor i in range(len(ex_val_data)):\n    xi = ex_val_labels[i][0]\n    x.append(xi)\n    yi = ex_val_labels[i][1]\n    y.append(yi)\n    zi = pd.DataFrame(ex_val_data[i][:,1]).max()[0]\n    # zi = pd.DataFrame(training[i][:,2]).max()[0]\n    # zi = pd.DataFrame(training[i][:,3]).max()[0]\n    # zi = pd.DataFrame(training[i][:,4]).max()[0]\n\n    z.append(zi)\n\ndf = pd.DataFrame({\'x\': x, \'y\': y, \'z\': z})\n\nheatmap1_data = pd.pivot_table(df, values=\'z\', index=[\'y\'], columns=\'x\')\n\nax = sns.heatmap(heatmap1_data, \n                square=True, \n                annot = True, \n                annot_kws={"fontsize": 10}, \n                fmt=\'.1f\', \n                linewidths=0.1, \n                linecolor=\'gray\',\n                cbar_kws = dict(use_gridspec=False, location=\'right\'))\n\nax.invert_yaxis()\n'

##### Cutting Training Data

In [29]:
train_cut = []
for i in range(len(training)):
    
    threshold = 0.1

    x = np.where(abs(training[i][:,1])>threshold)[0][0]
    y = np.where(abs(training[i][:,2])>threshold)[0][0]
    z = np.where(abs(training[i][:,3])>threshold)[0][0]
    p = np.where(abs(training[i][:,4])>threshold)[0][0]

    start = min(x,y,z,p)

    d = np.empty_like(training[i][start:,:])

    d[:,0] = training[i][start:, 0]
    d[:,1] = training[i][start:, 1]
    d[:,2] = training[i][start:, 2]
    d[:,3] = training[i][start:, 3]
    d[:,4] = training[i][start:, 4]

    train_cut.append(d)

##### Normalize Training Data

In [30]:
# maxim = []
# minim = []
# for i in range (len(train_cut)):
#     for j in range (1,5):
#         p = train_cut[i][:,j].max()
#         q = train_cut[i][:,j].min()

#         maxim.append(p)
#         minim.append(q)
# # print(np.asarray(maxim).max(),np.asarray(minim).min())

# create variables for normalized data
# train_norm = np.zeros_like(np.array(train_cut)) 
train_norm = []
train_norm_labels = np.zeros_like(np.asarray(training_labels))

for i in range (0, len(train_cut)):
    
    n = np.zeros_like(train_cut[i])
    n[:,1] = minmax_scale(train_cut[i][:,1])
    n[:,2] = minmax_scale(train_cut[i][:,2])
    n[:,3] = minmax_scale(train_cut[i][:,3])
    n[:,4] = minmax_scale(train_cut[i][:,4])
    
    train_norm.append(n)

    
scalar = MinMaxScaler()
train_norm_labels = scalar.fit_transform(training_labels)

In [31]:
maxim = []
minim = []
for i in range (len(train_norm)):
    for j in range (1,5):
        p = train_norm[i][:,j].max()
        q = train_norm[i][:,j].min()

        maxim.append(p)
        minim.append(q)
print(np.asarray(maxim).max(),np.asarray(minim).min())

1.0000000000000002 0.0


In [32]:
plt.close()
plt.plot(train_norm_labels[:,0], train_norm_labels[:,1], 'o', c = 'r', label = 'Normalized Training Data Labels')
plt.legend(loc='lower center', bbox_to_anchor=(1, -0.5))
plt.show()

##### Resample Training Data

In [33]:
train_resample = []

# create a dataframe from the normalized data
for i in range (0, len(training)):
    train_resample.append(pd.DataFrame(train_norm[i], columns = ['time', '1', '2', '3', '4']))
    # drop the time column
    train_resample[i] = train_resample[i].drop(columns = ['time'])

size = 100

for i in range (0, len(training)):
    train_resample[i] = resample(train_resample[i], size)

np.asarray(train_resample).shape

(925, 100, 4)

In [34]:
plt.close()
fig, axs = plt.subplots(2, 1)

axs[0].plot(training[10][:,2], c = 'r', label = 'Original Training Data')
axs[1].plot(train_resample[10][:,1], c = 'b', label = 'cut/ norm/ resampled Data')

axs[0].legend(loc='lower left')
axs[1].legend(loc='lower left')
plt.show()

#### Compare Experimental, Experimental-Validation and Training Data

In [35]:
plt.close()
fig, axs = plt.subplots(2, 4, sharey=True)

ex_val_c = 20 # Given validation folder has 24 data points. so use - (0, 1, .... , 23)

# find out the index of the validation set in the training data
x = []
for i in range (0, len(training_labels)):
    if (training_labels[i][0] == ex_val_labels[ex_val_c][0]) and (training_labels[i][1] == ex_val_labels[ex_val_c][1]):
        x.append(i)

# plot to compare the validation set with the training set
fig.suptitle(f'Training Data Coordinates - {training_labels[x[0]]} \nExperimental Validation Data Coordinates - {ex_val_labels[ex_val_c]} - Trial [{ex_val_trails[ex_val_c]}]')

axs[0,0].plot(training[x[0]][:,0], training[x[0]][:,1], label='P - 1', c = 'r')
axs[0,1].plot(training[x[0]][:,0], training[x[0]][:,2], label='P - 2', c = 'g')
axs[0,2].plot(training[x[0]][:,0], training[x[0]][:,3], label='P - 3', c = 'b')
axs[0,3].plot(training[x[0]][:,0], training[x[0]][:,4], label='P - 4', c = 'y')

axs[1,0].plot(ex_val_cut_data[ex_val_c][:,0], ex_val_cut_data[ex_val_c][:,1], label='P - 1', c = 'r')
axs[1,1].plot(ex_val_cut_data[ex_val_c][:,0], ex_val_cut_data[ex_val_c][:,2], label='P - 2', c = 'g')
axs[1,2].plot(ex_val_cut_data[ex_val_c][:,0], ex_val_cut_data[ex_val_c][:,3], label='P - 3', c = 'b')
axs[1,3].plot(ex_val_cut_data[ex_val_c][:,0], ex_val_cut_data[ex_val_c][:,4], label='P - 4', c = 'y')

# fig.legend()
plt.show()

In [36]:
plt.close()
fig, axs = plt.subplots(1, 4, sharex=True, sharey=True)

for i in range(4):
    axs[0].plot(ex_val_cut_data[i][:,0], ex_val_cut_data[i][:,1], c = 'r', label = 'Exp Val Data - P-1')
    axs[1].plot(ex_val_cut_data[i][:,0], ex_val_cut_data[i][:,2], c = 'g', label = 'Exp Val Data - P-2')
    axs[2].plot(ex_val_cut_data[i][:,0], ex_val_cut_data[i][:,3], c = 'b', label = 'Exp Val Data - P-3')
    axs[3].plot(ex_val_cut_data[i][:,0], ex_val_cut_data[i][:,4], c = 'y', label = 'Exp Val Data - P-4')

plt.show()

#### Export data to pickle

In [37]:
# with open('training_data.pickle', 'wb') as f:
#     pickle.dump(training, f)

# with open('train_resample.pickle', 'wb') as f:
#     pickle.dump(train_resample, f)
# with open('train_norm_labels.pickle', 'wb') as f:
#     pickle.dump(train_norm_labels, f)

# with open('training_labels.pickle', 'wb') as f:
#     pickle.dump(training_labels, f)

# with open('exp_validation_data.pickle', 'wb') as f:
#     pickle.dump(ex_val_data, f)
# with open('exp_validation_labels.pickle', 'wb') as f:
#     pickle.dump(ex_val_labels, f)
# with open('exp_validation_trails.pickle', 'wb') as f:
#     pickle.dump(ex_val_trails, f)


# with open('exp_val_resample.pickle', 'wb') as f:
#     pickle.dump(ex_val_resample, f)
# with open('exp_val_norm_labels.pickle', 'wb') as f:
#     pickle.dump(ex_val_norm_labels, f)

# with open('ex_resample.pickle', 'wb') as f:
#     pickle.dump(ex_resample, f)

#### AI Model

##### Split the data into training and testing sets

In [38]:
# Extract the data from the pickle files
'''
with open('train_resample.pickle', 'rb') as f:
    train_resample = pickle.load(f)
with open('train_norm_labels.pickle', 'rb') as f:
    train_norm_labels = pickle.load(f)
'''

X = np.asarray(train_resample)
y = np.asarray(train_norm_labels)

# create split data from the normalized data
size = 0.2
x_train, x_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=size)
print(f'Length of training data - {len(x_train)} length of labels - {len(y_train)} \nLength of test data - {len(x_test)} length of labels - {len(y_test)}')

Length of training data - 740 length of labels - 740 
Length of test data - 185 length of labels - 185


##### TensorBoard

In [None]:
# %tensorboard --logdir=logs/

##### Model Training and Tuning

DNN Tuning

In [None]:
'''
log_files = f'DNN_log.{int(time.time())}'
tuner_logs= f'tunelog/DNN_tuner_log.{int(time.time())}'

tensorboard = TensorBoard(log_dir=f'logs/{log_files}', 
                          histogram_freq=1, 
                          write_graph=True,
                          write_grads=True)


def dnnModel(hp):
    model = Sequential()
    model.add(Flatten(input_shape=(100,4)))
    model.add(Dense(units=hp.Int('layer_1_nodes', 
                    min_value=32, max_value=512, step=32), 
                    activation=hp.Choice("activation_1", ["relu", "sigmoid", "tanh"])))
    
    for i in range(hp.Int('n_layers', min_value=0, max_value=4, step=1)):
        model.add(Dense(units=hp.Int(f'layer_{i+1}_nodes', min_value=32, max_value=512, step=32), activation=hp.Choice(f"activation_{i+1}", ["relu", "sigmoid", "tanh"])))
    
    model.add(Dense(units=2, activation=hp.Choice("activation_out", ['sigmoid', 'linear']))) # output layer

    model.summary()

    # Compile model
    # lr = hp.Float("lr", min_value=1e-4, max_value=1e-2)
    opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
    sgd = tf.keras.optimizers.SGD(learning_rate=0.0001)

    model.compile(loss = hp.Choice('loss', ['mae']),
                optimizer = hp.Choice('optimizer', ['Adam', 'RMSprop']), 
                metrics=['Accuracy'])
    
    return model

# Hyperparameter tuning
tuner = RandomSearch(dnnModel,
                    objective='val_Accuracy',
                    max_trials=50,
                    executions_per_trial=2,
                    directory=tuner_logs,
                    project_name='DNN_tuning')

tuner.search(x=x_train,
            y=y_train,
            epochs=60,
            batch_size=32,
            validation_data=(x_test, y_test),
            callbacks=[tensorboard])
            '''

In [None]:
# bestModels = tuner.get_best_models(num_models=2)

In [None]:
# Save the keras model
# model.save(f'model/complete_tunedModel_97perAcc.{int(time.time())}.h5')

CNN Tuning

In [None]:
'''
log_files = f'CNN_log.{int(time.time())}'
tuner_logs= f'tunelog/CNN_tuner_log.{int(time.time())}'

tensorboard = TensorBoard(log_dir=f'logs/{log_files}', 
                          histogram_freq=1, 
                          write_graph=True,
                          write_grads=True)

def cnnModel(hp):
    model = Sequential()

    model.add(Conv1D(filters=hp.Int('conv_1_filter', min_value=32, max_value=512, step=32), 
                     kernel_size=hp.Int('Kernal_1', min_value=2, max_value=5, step=1), 
                     strides=hp.Int('stride_1', min_value=1, max_value=5, step=1),
                     activation=hp.Choice("activation_1", ["relu", "sigmoid"]), 
                     input_shape=(100,4)))
    
    model.add(MaxPooling1D(pool_size=hp.Int('pool_1', min_value=2, max_value=5, step=1))),

    for i in range(hp.Int('n_layers', min_value=0, max_value=4, step=1)):
        model.add(Conv1D(filters=hp.Int(f'conv_{i+1}_filter', min_value=32, max_value=512, step=32), 
                         kernel_size=hp.Int(f'Kernal_{i+1}', min_value=2, max_value=5, step=1), 
                         strides=hp.Int(f'stride_{i+1}', min_value=1, max_value=5, step=1),
                         activation=hp.Choice(f"activation_{i+1}", ["relu", "sigmoid"])))
        model.add(MaxPooling1D(pool_size=hp.Int(f'pool_{i+1}', min_value=2, max_value=5, step=1)))

    model.add(Flatten())


    model.add(Dense(units=hp.Int('FC_layer_1_nodes',
                    min_value=32, max_value=512, step=32),
                    activation=hp.Choice("FC_activation_1", ["relu", "sigmoid"])))
    
    model.add(Dense(units=hp.Int('FC_layer_2_nodes',
                    min_value=32, max_value=512, step=32),
                    activation=hp.Choice("FC_activation_2", ["relu", "sigmoid"])))

    model.add(Dense(units=2, activation=hp.Choice("activation_out", ['sigmoid', 'linear']))) # output layer

    model.summary()

    # Compile model
    # lr = hp.Float("lr", min_value=1e-4, max_value=1e-2)
    opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
    sgd = tf.keras.optimizers.SGD(learning_rate=0.0001)

    model.compile(loss = hp.Choice('loss', ['mse', 'mae']),
                optimizer = hp.Choice('optimizer', ['Adam', 'RMSprop']), 
                metrics=['Accuracy'])

    return model

# Hyperparameter tuning
tuner = RandomSearch(cnnModel,
                    objective='val_Accuracy',
                    max_trials=560,
                    executions_per_trial=3,
                    directory=tuner_logs,
                    project_name='CNN_tuning')

tuner.search(x=x_train,
            y=y_train,
            epochs=60,
            batch_size=32,
            validation_data=(x_test, y_test),
            callbacks=[tensorboard])
'''

In [None]:
# bestModels = tuner.get_best_models(num_models=2)

#### DNN Model

In [None]:
model = Sequential()
model.add(Flatten(input_shape=(100,4)))

model.add(Dense(units=512, activation='relu'))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=256, activation='relu'))
model.add(Dense(units=96, activation='relu'))
model.add(Dense(units=12, activation='relu'))

model.add(Dense(units=2, activation='linear'))

model.summary()

# Compile model
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(loss = 'mse', optimizer = 'Adam', metrics=['Accuracy'])

# Fit the model
history = model.fit(x_train, y_train, epochs=50, verbose=1, batch_size=32, validation_data=(x_test, y_test))

In [None]:
# model.save(f'model/DNN_val_acc_97perAcc.{int(time.time())}.h5')

In [None]:
mean_squared_error(scalar.inverse_transform(y_test), scalar.inverse_transform(model.predict(x_test)), squared=True)

In [None]:
# mean_squared_error((ex_val_labels), scalar.inverse_transform(model.predict(ex_val_resample)), squared=True)

In [None]:
plt.close()
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.plot(history.history['Accuracy'], label='Training Accuracy')
plt.plot(history.history['val_Accuracy'], label='Validation Accuracy')

plt.yticks(np.arange(0, 1, 0.05))

plt.tight_layout()
plt.legend()
plt.show()

#### FINAL - Tuned - CNN

In [39]:
Alex4=Sequential()

Alex4.add(Conv1D(filters=96, kernel_size=5, strides=4, activation='relu', input_shape=(100,4), padding='same'))
Alex4.add(MaxPooling1D(pool_size=3, strides= 2, padding='valid'))
Alex4.add(Dropout(0.3))

Alex4.add(Conv1D(filters=256, strides=1, kernel_size=5, activation='relu', padding='same'))
Alex4.add(MaxPooling1D(pool_size=3, strides=2, padding='valid'))

Alex4.add(Conv1D(filters=384, strides=1, kernel_size=3, activation='relu', padding='same'))
Alex4.add(Dropout(0.4))

Alex4.add(Conv1D(filters=256, strides=1, kernel_size=3, activation='relu', padding='same'))

Alex4.add(Conv1D(filters=256, strides=1, kernel_size=3, activation='relu', padding='same'))
Alex4.add(MaxPooling1D(pool_size=3, strides=2, padding='valid'))

# Fully Connected Layers
Alex4.add(Flatten())
Alex4.add(Dense(units=128, activation='relu',use_bias='False'))
Alex4.add(Dense(units=64, activation='relu',use_bias='False'))
Alex4.add(Dense(units=2, activation='linear',use_bias='False'))
Alex4.summary()

opt = tf.keras.optimizers.Adam(learning_rate=0.0001)

Alex4.compile(loss = 'mse', optimizer = 'Adam', metrics=['Accuracy'])

# Fit the cnn_model
Alex_history = Alex4.fit(x_train, y_train, epochs=90, verbose=1, batch_size=32, validation_data=(x_test, y_test))

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 25, 96)            2016      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 12, 96)           0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 12, 96)            0         
                                                                 
 conv1d_1 (Conv1D)           (None, 12, 256)           123136    
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 5, 256)           0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 5, 384)            2

In [40]:
mean_squared_error(scalar.inverse_transform(y_test), scalar.inverse_transform(Alex4.predict(x_test)), squared=True)
# mean_absolute_error(scalar.inverse_transform(y_test), scalar.inverse_transform(Alex4.predict(x_test)))

8.283135388392598

In [41]:
# Alex4.save(f'model/FINAL_CNN_val_acc_97_16_MSE.{int(time.time())}.h5')

In [41]:
plt.close()
plt.plot(Alex_history.history['loss'], label='Training Loss')
plt.plot(Alex_history.history['val_loss'], label='Validation Loss')
plt.plot(Alex_history.history['Accuracy'], label='Training Accuracy')
plt.plot(Alex_history.history['val_Accuracy'], label='Validation Accuracy')

plt.yticks(np.arange(0, 1, 0.05))

plt.tight_layout()
plt.xlabel('Epochs')
plt.legend()
plt.show()

#### Predict experimental data validation

In [None]:
# model = tf.keras.models.load_model('model/tunedModel_98perAcc.1640573960.h5')
# model = tf.keras.models.load_model('Alex3.h5')
# model = tf.keras.models.load_model('model/CNN_val_acc_96_48_MSEperAcc.1642513870.h5')
# model.summary()

In [42]:
y_pred = Alex4.predict(np.asarray(ex_val_resample))
y_abs = ex_val_labels

y_pred = scalar.inverse_transform(np.asarray(y_pred))
diff = y_abs-y_pred
for i in range(len(y_pred)):
    print(f'Absolute Trial {ex_val_trails[i]}- {y_abs[i]} Predicted - {y_pred[i]} \tDifference - {y_abs[i] - y_pred[i]}')

Absolute Trial 1.0- [205. 200.] Predicted - [211.88933 209.00723] 	Difference - [-6.889328   -9.00723267]
Absolute Trial 2.0- [205. 200.] Predicted - [211.77893 208.04541] 	Difference - [-6.77893066 -8.04541016]
Absolute Trial 3.0- [205. 200.] Predicted - [217.23978 209.22516] 	Difference - [-12.23977661  -9.22515869]
Absolute Trial 4.0- [205. 200.] Predicted - [210.91423 209.77544] 	Difference - [-5.91423035 -9.7754364 ]
Absolute Trial 1.0- [225. 210.] Predicted - [228.30815 205.40869] 	Difference - [-3.30815125  4.59130859]
Absolute Trial 2.0- [225. 210.] Predicted - [229.55978 206.81998] 	Difference - [-4.55978394  3.18002319]
Absolute Trial 3.0- [225. 210.] Predicted - [227.4721  207.02498] 	Difference - [-2.47210693  2.97502136]
Absolute Trial 4.0- [225. 210.] Predicted - [226.51326 207.50056] 	Difference - [-1.51325989  2.49943542]
Absolute Trial 1.0- [230. 245.] Predicted - [229.5532  246.93808] 	Difference - [ 0.4467926  -1.93807983]
Absolute Trial 2.0- [230. 245.] Predicted - 

In [45]:
mean_squared_error(y_abs, y_pred, squared=True)
mean_absolute_error(y_abs, y_pred)

4.230709075927734

#### Experimental Group - 7 Data (and plots for report)

In [46]:
y_pred_g7 = Alex4.predict(np.asarray(ex_resample))
y_pred_g7 = scalar.inverse_transform(y_pred_g7)
y_pred_g7

array([[250.53885, 243.45609],
       [236.22418, 236.65324],
       [245.07315, 251.98546],
       [263.69263, 220.28336],
       [243.4668 , 251.71895],
       [261.33875, 246.14745],
       [238.11357, 296.278  ],
       [226.96254, 229.83734],
       [265.74573, 293.51343],
       [251.57858, 247.45642],
       [258.8063 , 226.47261],
       [248.48349, 250.56963],
       [200.45544, 233.5326 ],
       [260.68024, 254.44629],
       [245.63058, 250.34038],
       [226.13751, 202.92587],
       [260.649  , 307.68744],
       [294.5068 , 275.73712]], dtype=float32)

In [47]:
plt.close()
fig, axs = plt.subplots(2, 4, sharey=True)

ex_c = 1 # Given validation folder has 18 data points. so use - (0, 1, .... , 17)

# find out the index of the validation set in the training data
p = []

for i in range (len(y_pred_g7)):
    xi = np.where(training_labels[:,0] == min(training_labels[:,0], key=lambda x:abs(x-y_pred_g7[i][0])))[0]
    yi = np.where(training_labels[:,1] == min(training_labels[:,1], key=lambda x:abs(x-y_pred_g7[i][1])))[0]
    pi = np.intersect1d(xi, yi)[0]
    
    p.append(pi)

# plot to compare the validation set with the training set
fig.suptitle(f'Closest Training Data Coordinates - {training_labels[p[ex_c]]} \nExperimental Predicted Coordinates - {y_pred_g7[ex_c]}')

axs[0,0].plot(training[p[ex_c]][:,1], label='P - 1', c = 'r')
axs[0,1].plot(training[p[ex_c]][:,2], label='P - 2', c = 'g')
axs[0,2].plot(training[p[ex_c]][:,3], label='P - 3', c = 'b')
axs[0,3].plot(training[p[ex_c]][:,4], label='P - 4', c = 'y')

axs[1,0].plot(ex_cut_data[ex_c][:,1], label='P - 1', c = 'r')
axs[1,1].plot(ex_cut_data[ex_c][:,2], label='P - 2', c = 'g')
axs[1,2].plot(ex_cut_data[ex_c][:,3], label='P - 3', c = 'b')
axs[1,3].plot(ex_cut_data[ex_c][:,4], label='P - 4', c = 'y')

# fig.legend()
plt.show()

##### Prediction post processing

sorting the exact experimental data

In [48]:
'''
Group_7	AA	250	265
Group_7	AA	245	255
Group_7	AA	235	235
Group_7	AA	265	235
Group_7	AA	250	250
Group_7	AA	255	265
Group_7	B	225	200
Group_7	B	270	300
Group_7	B	200	225
Group_7	B	235	300
Group_7	B	260	320
Group_7	B	300	285
Group_7	A	250	265
Group_7	A	245	255
Group_7	A	235	235
Group_7	A	265	235
Group_7	A	250	250
Group_7	A	265	255
'''

exact_ex_labels = np.array(
[[250,	265], 
[245,	255],
[235,	235],
[265,	235],
[250,	250],
[255,	265],
[225,	200],
[270,	300],
[200,	225],
[235,	300],
[260,	320],
[300,	285],
[250,	265],
[245,	255],
[235,	235],
[265,	235],
[250,	250],
[265,	255]])

In [49]:
pred_ex_labels = np.array(
[[253.32326, 244.85556],
[234.51955, 233.11015],
[243.45721, 259.58542],
[260.85577, 225.5717 ],
[241.64229, 252.67258],
[262.7103 , 247.9715 ],
[236.75409, 299.41733],
[230.32304, 228.36177],
[264.8639 , 298.7226 ],
[252.88496, 258.21362],
[260.63187, 232.50989],
[252.09583, 252.05911],
[201.56601, 229.92183],
[256.91928, 257.9066 ],
[245.16226, 252.61618],
[224.14635, 201.8915 ],
[269.60422, 310.06744],
[298.5857 , 281.49817]])

In [50]:
print(exact_ex_labels.shape, pred_ex_labels.shape)

(18, 2) (18, 2)


In [51]:
plt.close()
plt.plot(training_labels[:,0], training_labels[:,1], '.', color = 'g', label='Training Data', alpha = 0.3)
plt.plot(pred_ex_labels[:,0], pred_ex_labels[:,1], 'o', color = 'b', label='Predicted')
plt.plot(exact_ex_labels[:,0], exact_ex_labels[:,1], 'o', color = 'r', label='Exact')
plt.scatter(exact_ex_labels[:,0], exact_ex_labels[:,1], s=750, facecolors='none', edgecolors='k')

plt.xticks(np.arange(175, 330, 5), rotation = 'vertical')
plt.yticks(np.arange(175, 330, 5))

legend_x = 0.5
legend_y = -0.27
plt.legend(bbox_to_anchor=(legend_x, legend_y), loc = 'lower center', ncol = 3)
plt.show()

In [52]:
x = []
p_loc=[]
for i in range(len(exact_ex_labels)):
    t = []
    for j in range(len(pred_ex_labels)):
        xi = (np.asarray(np.abs(exact_ex_labels[i]-pred_ex_labels[j])).sum())
        t.append(xi)
    p_loc.append(np.argmin(t))
    x.append(np.min(t))

In [67]:
print('Exact Coordinates \tPredicted Coordinates \t     Difference')
for i in range(len(p_loc)):
    print(f'{exact_ex_labels[i]}---------------{pred_ex_labels[p_loc[i]]}--------{exact_ex_labels[i] - pred_ex_labels[p_loc[i]]}')

Exact Coordinates 	Predicted Coordinates 	     Difference
[250 265]---------------[252.88496 258.21362]--------[-2.88496  6.78638]
[245 255]---------------[245.16226 252.61618]--------[-0.16226  2.38382]
[235 235]---------------[234.51955 233.11015]--------[0.48045 1.88985]
[265 235]---------------[260.63187 232.50989]--------[4.36813 2.49011]
[250 250]---------------[252.09583 252.05911]--------[-2.09583 -2.05911]
[255 265]---------------[252.88496 258.21362]--------[2.11504 6.78638]
[225 200]---------------[224.14635 201.8915 ]--------[ 0.85365 -1.8915 ]
[270 300]---------------[264.8639 298.7226]--------[5.1361 1.2774]
[200 225]---------------[201.56601 229.92183]--------[-1.56601 -4.92183]
[235 300]---------------[236.75409 299.41733]--------[-1.75409  0.58267]
[260 320]---------------[269.60422 310.06744]--------[-9.60422  9.93256]
[300 285]---------------[298.5857  281.49817]--------[1.4143  3.50183]
[250 265]---------------[252.88496 258.21362]--------[-2.88496  6.78638]
[245 25

Reinforce prediction with post-processing

In [64]:
temp = 17

# find out the index of the validation set in the training data
p = []

for i in range (len(pred_ex_labels)):
    xi = np.where(training_labels[:,0] == min(training_labels[:,0], key=lambda x:abs(x-pred_ex_labels[i][0])))[0]
    yi = np.where(training_labels[:,1] == min(training_labels[:,1], key=lambda x:abs(x-pred_ex_labels[i][1])))[0]
    pi = np.intersect1d(xi, yi)[0]
    
    p.append(pi)

for i in range(len(p)):
    print(f'{training_labels[p][i]} \t {pred_ex_labels[i]}')

x_offset = [-10, -5, 0, 5, 10]
y_offset = [-10, -5, 0, 5, 10]
loc=[]; x=[]; y=[]

for k in (x_offset):
    for l in (y_offset):
        xi = np.where(training_labels[:,0][p][temp] + k == training_labels[:,0])
        yi = np.where(training_labels[:,1][p][temp] + l == training_labels[:,1])
        x.append(xi)
        y.append(yi)
    for m in range(len(x)):
        for n in range(len(y)):
            loc.append(np.intersect1d(x[m], y[n])[0])
locs = np.unique(loc)

mse = []
for i in range(len(locs)):
    err = mean_absolute_error(train_resample[locs[i]], ex_resample[temp])
    mse.append(err)
min_loc = locs[np.argmin(mse)]

[255. 245.] 	 [253.32326 244.85556]
[235. 235.] 	 [234.51955 233.11015]
[245. 260.] 	 [243.45721 259.58542]
[260. 225.] 	 [260.85577 225.5717 ]
[240. 255.] 	 [241.64229 252.67258]
[265. 250.] 	 [262.7103 247.9715]
[235. 300.] 	 [236.75409 299.41733]
[230. 230.] 	 [230.32304 228.36177]
[265. 300.] 	 [264.8639 298.7226]
[255. 260.] 	 [252.88496 258.21362]
[260. 235.] 	 [260.63187 232.50989]
[250. 250.] 	 [252.09583 252.05911]
[200. 230.] 	 [201.56601 229.92183]
[255. 260.] 	 [256.91928 257.9066 ]
[245. 255.] 	 [245.16226 252.61618]
[225. 200.] 	 [224.14635 201.8915 ]
[270. 310.] 	 [269.60422 310.06744]
[300. 280.] 	 [298.5857  281.49817]


In [65]:
plt.close()
fig, axs = plt.subplots(2, 4)

ex_c = temp # Given validation folder has 18 data points. so use - (0, 1, .... , 17)

# plot to compare the validation set with the training set
fig.suptitle(f'Least MSE Training Data Coordinates - {training_labels[min_loc]} \nExp G7 Predicted Coordinates - {pred_ex_labels[ex_c]}')

axs[0,0].plot(train_resample[min_loc][:,0], label='P - 1', c = 'r')
axs[0,1].plot(train_resample[min_loc][:,1], label='P - 2', c = 'g')
axs[0,2].plot(train_resample[min_loc][:,2], label='P - 3', c = 'b')
axs[0,3].plot(train_resample[min_loc][:,3], label='P - 4', c = 'y')

axs[1,0].plot(ex_resample[ex_c][:,0], label='P - 1', c = 'r')
axs[1,1].plot(ex_resample[ex_c][:,1], label='P - 2', c = 'g')
axs[1,2].plot(ex_resample[ex_c][:,2], label='P - 3', c = 'b')
axs[1,3].plot(ex_resample[ex_c][:,3], label='P - 4', c = 'y')

# fig.legend()
plt.show()

In [66]:
plt.close()
plt.plot(training_labels[:,0], training_labels[:,1], '.', color = 'g', label='Training Data', alpha = 0.3)
plt.plot(pred_ex_labels[:,0][temp], pred_ex_labels[:,1][temp], 's', color = 'k', label='Predicted')

plt.scatter(training_labels[:,0][locs], training_labels[:,1][locs], c='b', alpha = 0.5, label = 'Buffer Training Data')
plt.scatter(training_labels[:,0][min_loc], training_labels[:,1][min_loc], c = 'r', label = 'Least MSE Point')

plt.xticks(np.arange(175, 330, 5), rotation = 'vertical')
plt.yticks(np.arange(175, 330, 5))

plt.legend(bbox_to_anchor=(0.5, 0), loc = 'lower center', ncol = 2)
plt.show()

#### Old CNN Model

In [None]:
# reg = tf.keras.regularizers.L2(0.01)

cnn_model = Sequential()
# create a CNN cnn_model
cnn_model.add(Conv1D(filters=224, kernel_size=3, activation='relu', input_shape=(100,4)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Dropout(0.25))
cnn_model.add(Conv1D(filters=384, kernel_size=3, activation='relu'))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Conv1D(filters=384, kernel_size=3, activation='relu'))
cnn_model.add(MaxPooling1D(pool_size=2))

cnn_model.add(Flatten())

cnn_model.add(Dense(units=32, activation='sigmoid'))
cnn_model.add(Dense(units=480, activation='relu'))
cnn_model.add(Dense(units=2, activation='linear'))
cnn_model.summary()
3
# Compile cnn_model
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)

cnn_model.compile(loss = 'mse', optimizer = 'RMSprop', metrics=['Accuracy'])

# Fit the cnn_model
cnn_history = cnn_model.fit(x_train, y_train, epochs=80, verbose=1, batch_size=32, validation_data=(x_test, y_test))

In [None]:
plt.close()
plt.plot(cnn_history.history['loss'], label='Training Loss')
plt.plot(cnn_history.history['val_loss'], label='Validation Loss')
plt.plot(cnn_history.history['Accuracy'], label='Training Accuracy')
plt.plot(cnn_history.history['val_Accuracy'], label='Validation Accuracy')

plt.yticks(np.arange(0, 1, 0.05))

plt.tight_layout()
plt.legend()
plt.show()

In [None]:
mean_squared_error(scalar.inverse_transform(y_test), scalar.inverse_transform(cnn_model.predict(x_test)), squared=False)

In [None]:
# cnn_save = cnn_model.save(f'model/cnn_model_98-7perAcc.{int(time.time())}.h5')

In [None]:
# cnn_98 = tf.keras.models.load_model('CNN_98_1.h5')

#### SVR Model

In [None]:
X = train_resample
y = training_labels

# create split data from the normalized data
size = 0.2
x_train, x_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=size)
print(f'Length of training data - {len(x_train)} length of labels - {len(y_train)} \nLength of test data - {len(x_test)} and length of labels - {len(y_test)}')

x_train_flat = []
for i in range(len(x_train)):    
    x_dummy = np.asarray(x_train[i]).flatten()
    x_train_flat.append(x_dummy)
    
x_train_flat = np.asarray(x_train_flat)

x_test_flat = []
for i in range(len(x_test)):    
    x_dummy = np.asarray(x_test[i]).flatten()
    x_test_flat.append(x_dummy)
    
x_test_flat = np.asarray(x_test_flat)

print(np.asarray(x_train_flat).shape, np.asarray(y_train).shape)

In [None]:
regr_multirf = MultiOutputRegressor(svm.SVR(kernel='rbf', C=1e5, gamma=0.03, epsilon=0.2, tol=1e-6))
regr_multirf.fit(x_train_flat, y_train)

y_pred = regr_multirf.predict(x_test_flat)
y_test-y_pred

In [None]:
mean_squared_error(y_test, y_pred, squared=True)

In [None]:
y_pred_exp = regr_multirf.predict(exp_val_X_flat)

mean_squared_error(exp_val_y,y_pred_exp)

In [None]:
ex_X_flat = []
for i in range(len(ex_resample)):
    x_dummy = np.asarray(ex_resample[i]).flatten()
    ex_X_flat.append(x_dummy)

ex_X_flat = np.asarray(ex_X_flat)
ex_X_flat.shape

In [None]:
y_pred_g7_knn = regr_multirf.predict(ex_X_flat)
y_pred_g7_knn

#### KNN Model

In [None]:
X = train_resample
y = training_labels

exp_val_X = ex_val_resample
exp_val_y = ex_val_labels

print(np.asarray(X).shape, np.asarray(y).shape)

# create split data from the normalized data
size = 0.1
x_train, x_test, y_train, y_test = train_test_split(X, y, shuffle=True, test_size=size)
print(f'Length of training data - {np.asarray(x_train).shape} length of labels - {np.asarray(y_train).shape} \nLength of test data - {np.asarray(x_test).shape} and length of labels - {np.asarray(y_test).shape}')

In [None]:
x_train_flat = []
for i in range(len(x_train)):    
    x_dummy = np.asarray(x_train[i]).flatten()
    x_train_flat.append(x_dummy)
    
x_train_flat = np.asarray(x_train_flat)

x_test_flat = []
for i in range(len(x_test)):    
    x_dummy = np.asarray(x_test[i]).flatten()
    x_test_flat.append(x_dummy)
    
x_test_flat = np.asarray(x_test_flat)

exp_val_X_flat = []
for i in range(len(exp_val_X)):
    x_dummy = np.asarray(exp_val_X[i]).flatten()
    exp_val_X_flat.append(x_dummy)

exp_val_X_flat = np.asarray(exp_val_X_flat)

print(np.asarray(x_train_flat).shape, np.asarray(y_train).shape)
print(x_test_flat.shape, y_test.shape)
print(exp_val_X_flat.shape, exp_val_y.shape)

In [None]:
knn = KNeighborsRegressor(n_neighbors=5, weights='distance')
knn.fit(x_train_flat, y_train)
y_pred = knn.predict(x_test_flat)
y_test-y_pred

In [None]:
mean_squared_error(y_test, y_pred)

In [None]:
y_pred_exp = knn.predict(exp_val_X_flat)

mean_squared_error(exp_val_y,y_pred_exp)

In [None]:
ex_X_flat = []
for i in range(len(ex_resample)):
    x_dummy = np.asarray(ex_resample[i]).flatten()
    ex_X_flat.append(x_dummy)

ex_X_flat = np.asarray(ex_X_flat)
ex_X_flat.shape

In [None]:
y_pred_g7_knn = knn.predict(ex_X_flat)
y_pred_g7_knn

#### -- Check -- prediction for augmentation validation data

In [None]:
c = []
checking1 = validation_aug[0][:,1:5]
checking1 = minmax_scale(checking1)
checking1 = resample(checking1, 100)

checking2 = validation_aug[1][:,1:5]
checking2 = minmax_scale(checking2)
checking2 = resample(checking2, 100)

checking3 = validation_aug[2][:,1:5]
checking3 = minmax_scale(checking3)
checking3 = resample(checking3, 100)

checking4 = validation_aug[3][:,1:5]
checking4 = minmax_scale(checking4)
checking4 = resample(checking4, 100)

c.append(checking1)
c.append(checking2)
c.append(checking3)
c.append(checking4)

'''
plt.close()
plt.title(f'Validation Data {validation_aug_labels[0]}')
plt.plot(checking1[:,0], label = 'P - 1')
plt.plot(checking1[:,1], label = 'P - 2')
plt.plot(checking1[:,2], label = 'P - 3')
plt.plot(checking1[:,3], label = 'P - 4')
plt.legend()
plt.show()
'''

In [None]:
print(f'{validation_aug_labels[0]} \n{validation_aug_labels[1]} \n{validation_aug_labels[2]} \n{validation_aug_labels[3]}')

p = Alex4.predict(np.asarray(c))
p = scalar.inverse_transform(p)
print(p)

error = []
for i in range(0, len(p)):
    a = validation_aug_labels[i][0]-p[i][0]
    b = validation_aug_labels[i][1]-p[i][1]
    e = [a, b]
    error.append(e)

print(f'Difference between the actual and predicted coordinates: \n{np.asarray(error)}')