## Importing Data

In [None]:
# -*- coding: utf-8 -*-
# Regression Example With Boston Dataset: Standardized and Wider
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline
from tensorflow.keras import layers
from sklearn.utils import shuffle

import pandas as pd
import seaborn as sns
import keras
import keras.utils
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np

from mpl_toolkits import mplot3d
from cgi import test

dataset = pd.read_csv('aggregated_data.csv')
dataset = shuffle(dataset)

std_scaler = StandardScaler()


In [None]:
def importData(data, scaler):

    train_dataset = data.sample(frac=0.8, random_state=9578)
    test_dataset = data.drop(train_dataset.index)


    train_features = train_dataset.copy()
    test_features = test_dataset.copy()


    train_labels = train_features.pop('Concentration')
    test_labels = test_features.pop('Concentration')

    train_features = scaler.fit_transform(train_features.to_numpy())
    dict = {'Time':train_features[:, 0], 'Current':train_features[:, 1], 'Spin Coating':train_features[:, 2] ,'Increaing PPM':train_features[:, 3], 'Temperature':train_features[:, 4], 'Repeat Sensor Use':train_features[:, 5], 'Days Elapsed':train_features[:, 6]}
    train_features = pd.DataFrame(dict)

    test_features = scaler.fit_transform(test_features.to_numpy())
    dict = {'Time':test_features[:, 0], 'Current':test_features[:, 1], 'Spin Coating':test_features[:, 2] ,'Increaing PPM':test_features[:, 3], 'Temperature':test_features[:, 4], 'Repeat Sensor Use':test_features[:, 5], 'Days Elapsed':test_features[:, 6]}
    test_features = pd.DataFrame(dict)

    #For later use
    data_labels = data.pop('Concentration')

    return data, train_features, test_features, train_labels, test_labels, data_labels
#sns.pairplot(train_dataset[['Time','Current', 'Spin Coating', 'Increasing PPM', 'Temperature', 'Repeat Sensor Use', 'Days Elapsed', 'Concentration']], diag_kind='kde')


#### Plotting Details

# Neural Network Creation and Selection Process

### Functions: Build NN Model, Fit Model, K Cross Validation

In [None]:
#Iterate through this a few dozen times

def build_model(n1, n2, train_feats):
  #Experiment with different models, thicknesses, layers, activation functions; Don't limit to only 10 nodes; Measure up to 64 nodes in 2 layers
  model = keras.Sequential([
    layers.Dense(n1, activation=tf.nn.relu, input_shape=[len(train_feats.keys())]),
    layers.Dense(n2, activation=tf.nn.relu),
    layers.Dense(1)
  ])

  optimizer = tf.keras.optimizers.RMSprop(0.001)
  model.compile(loss='mse', optimizer=optimizer, metrics=['mae','mse'])
  early_stop = keras.callbacks.EarlyStopping(monitor='val_loss',patience=5)

  return model

def model_history(features, labels, model, epo, batch, vbs):
  
    history = model.fit(
        features, labels,
        epochs=epo, batch_size=batch, validation_split=0.2, verbose=vbs #, callbacks=early_stop
    )

    hist = pd.DataFrame(history.history)
    
    return hist

def KCrossValidation(i, features, labels, num_val_samples, epochs, batch, verbose, n1, n2):

    print('processing fold #', i)
    val_data = features[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = labels[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate([features[:i * num_val_samples], features[(i + 1) * num_val_samples:]], axis=0)
    partial_train_targets = np.concatenate([labels[:i * num_val_samples], labels[(i + 1) * num_val_samples:]],     axis=0)

    model = build_model(n1, n2, features)

    history = model_history(partial_train_data, partial_train_targets, model, epochs, batch, verbose)

    test_loss, test_mae, test_mse = model.evaluate(val_data, val_targets, verbose=1)

    return model, history, test_loss, test_mae, test_mse

## NEURAL NETWORK PARAMETERS

In [None]:
all_features, train_features, test_features, train_labels, test_labels, data_labels = importData(dataset.copy(), std_scaler)

k_folds = 3
num_val_samples = len(train_labels) // k_folds

n1_start = 32
n2_start = 31
sum_nodes = 64

num_epochs = 300
batch_size = 100
verbose = 0
avg_val_scores = []
order_of_architecture = []

all_networks  = []
all_history  = []
mae_history = []


##### Plotting Functions

In [None]:
def plot_loss(history):

  plt.plot(history['loss'], label='loss')
  plt.plot(history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.grid(True)
  plt.show()

def correlation_plots(model, label, input_data, title, xlabel, ylabel):
#test_loss, test_acc = model.evaluate(test_features, test_labels, verbose = 1)

  test_predictions = model.predict(input_data).flatten()
  plt.scatter(label,test_predictions)
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title(title)
  plt.axis('equal')
  plt.axis('square')
  plt.grid(True)
  plt.show()
  return test_predictions


def plotGraph(y_test, y_pred,regressorName):
    plt.scatter(range(len(y_pred)), y_test, color='blue')
    plt.scatter(range(len(y_pred)), y_pred, color='red')
    plt.title(regressorName)
    plt.show()
    return


#### Where the Magic Happens

In [None]:
#(TAKEN FROM DEEP LEARNING WITH PYTHON BY MANNING)
for i in range(n1_start, sum_nodes):

    for j in range(n2_start, sum_nodes):
        if (i+j > sum_nodes):
            continue
        
        print("first hidden layer", j)
        print("second hidden layer", i)
        k_fold_test_scores = []
        k_models = []
        k_history = []

        k_mae_history = []

        for fold in range(k_folds):
            model, history, test_loss, test_mae, test_mse = KCrossValidation(
                fold, 
                train_features, 
                train_labels, 
                num_val_samples, 
                num_epochs, 
                batch_size, 
                verbose, 
                j, 
                i)

            #plot_loss(history)
            k_fold_test_scores.append(test_mae)
            k_history.append(history)
            k_models.append(model)
            k_mae_history.append(history['val_mae'])


        avg_val_scores.append(sum(k_fold_test_scores)/len(k_fold_test_scores))
        all_history.append(k_history)
        all_networks.append(k_models)

        
        mae_history.append([ np.mean([x[i] for x in k_mae_history]) for i in range(num_epochs)])


        order_of_architecture.append([i, j])


       #test_predictions = correlation_plots(model, test_labels, test_features, "Testing Correlation Plot", "Actual", "Predicted")
        #plotGraph(test_labels, test_predictions, "Testing Plot")


        #training_predictions = correlation_plots(model, train_labels, train_features, "Training Correlation Plot", "Actual", "Predicted")
        #plotGraph(train_labels, training_predictions, "Training Plot")

# Find the model with the lowest error
lowest_index = avg_val_scores.index(min(avg_val_scores))
optimal_NNs = all_networks[lowest_index]

print(avg_val_scores)

#print(mae_history)
# Find the history of that model, and display it
for i in range(k_folds):
    x = all_history[lowest_index][i]['val_mae']




Plotting Loss Transition

In [None]:
def smooth_curve(points, factor=0.8):
    smoothed_points = []
    for point in points:
        if smoothed_points:
            previous = smoothed_points[-1]
            smoothed_points.append(previous * factor + point * (1 - factor))
        else:
            smoothed_points.append(point)
    return smoothed_points

plt.plot(range(1, len(mae_history[lowest_index][int(num_epochs/10):]) + 1), mae_history[lowest_index][int(num_epochs/10):])
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()

smooth_mae_history = smooth_curve(mae_history[lowest_index][int(num_epochs/10):])
plt.plot(range(1, len(smooth_mae_history) + 1), smooth_mae_history)
plt.xlabel('Epochs')
plt.ylabel('Validation MAE')
plt.show()


# Isolating Parameters and Printing them Out

Scaling Data Set Function

In [None]:
def scaleDataset(data):
    data = std_scaler.fit_transform(data.to_numpy())
    dict = {'Time':data[:, 0], 'Current':data[:, 1], 'Spin Coating':data[:, 2] ,'Increaing PPM':data[:, 3], 'Temperature':data[:, 4], 'Repeat Sensor Use':data[:, 5], 'Days Elapsed':data[:, 6]}
    return pd.DataFrame(dict)


### Isolating Spin Coating

In [None]:
# Split the data labels with spin coating 0 and 1

sc_index = [np.where(dataset['Spin Coating'].to_numpy()  == 0)[0], np.where(dataset['Spin Coating'].to_numpy()  == 1)[0]]

scaled_features = scaleDataset(all_features.copy())

#The full features of the data points that use Spin Coating
sc_features = [scaled_features.iloc[sc_index[0]], scaled_features.iloc[sc_index[1]]]

#The stupid labels for Spin coating vs. not Spin coating
sc_label = [data_labels.to_numpy()[sc_index[0]], data_labels.to_numpy()[sc_index[1]]]

sc_mae = []
for i in range(0, 2):
    tmp_mae = []
    for NN in optimal_NNs:
        test_loss, test_mae, test_mse = NN.evaluate(sc_features[i], sc_label[i],batch_size=10,  verbose=1)
        tmp_mae.append(test_mae)

        #sc_predictions = correlation_plots(NN, sc_label[i], sc_features[i], "Testing Correlation Plot for SC " + str(i), "Actual", "Predicted")
        #plotGraph(sc_label[i], sc_predictions, "SC Plot")

    sc_mae.append(tmp_mae)
    

for i in sc_mae:
    print(min(i), sum(i)/len(i) )

### Isolating Time

In [None]:
# Split the data labels with time
time_index= []
for i in range(0, 51):
    time_index.append(np.where(dataset['Time'].to_numpy()  == i)[0])

    
scaled_features = scaleDataset(all_features.copy())
#The full features of the data points that use certain time values
time_features = []
time_labels = []

for i in range(0, 51):
    time_features.append(scaled_features.iloc[time_index[i]])
    #The stupid labels for each second
    time_labels.append(data_labels.to_numpy()[time_index[i]])


time_mae = []
for i in range(15, 51):
    tmp_mae = []
    #print("TIME = ", i, "S")
    for NN in optimal_NNs:
        test_loss, test_mae, test_mse = NN.evaluate(time_features[i], time_labels[i], batch_size=10,  verbose=1)
        tmp_mae.append(test_mae)

        
        #time_predictions = correlation_plots(NN, time_labels[i], time_features[i], "Testing Correlation Plot for Time " + str(i), "Actual", "Predicted")
        #plotGraph(time_labels[i], time_predictions, "Time Plot")

    time_mae.append(tmp_mae)

mins = []
averages = []
for i in time_mae:
    mins.append(min(i))
    averages.append(sum(i)/len(i))

for j in range(0, len(time_mae)):
    print(mins[j], averages[j])


### Isolating Spin Coating and Time

In [None]:
# Splitting Spin Coating, then seperating by time

ss_1 = np.where(dataset['Spin Coating'].to_numpy()  ==  1)[0]
ss_0 = np.where(dataset['Spin Coating'].to_numpy()  ==  0)[0]

times_index = []
times_0 = []

shared_time_1 = []
shared_time_0 = []

for i in range(0, 51):
    times_index.append(np.where(dataset['Time'].to_numpy()  == i)[0].tolist())

    time_1_tmp = []
    time_0_tmp = []
    
    for index_sc in ss_1:
        if index_sc in times_index[i]:
            time_1_tmp.append(index_sc)
        else:
            time_0_tmp.append(index_sc)
            
    shared_time_1.append(time_1_tmp)
    shared_time_0.append(time_0_tmp)

scaled_features = scaleDataset(all_features.copy())

shared_features = []
shared_labels = []

for i in range(0, 51):
    shared_features.append([scaled_features.iloc[shared_time_0[i]] , scaled_features.iloc[shared_time_1[i]]])
    shared_labels.append([data_labels.to_numpy()[shared_time_0[i]], data_labels.to_numpy()[shared_time_1[i]]])


shared_mae = []
for i in range(15, 51):
    sc_tmp_mae = []

    for j in range(0, 2):
        tmp_mae = []
        #print("TIME = ", i, "S", "SPINCOATED = ", j)

        for NN in optimal_NNs:
            test_loss, test_mae, test_mse = NN.evaluate(shared_features[i][j], shared_labels[i][j], batch_size=10,  verbose=0)
            tmp_mae.append(test_mae)

                
            #shared_predictions = correlation_plots(NN, shared_labels[i][j], shared_features[i][j].to_numpy(),  "Testing Correlation Plot for Shared " + str(j) + " at time: " + str(i), "Actual", "Predicted")
            #plotGraph(shared_labels[i][j], shared_predictions, "Shared Plot")

        sc_tmp_mae.append(tmp_mae)
    shared_mae.append(sc_tmp_mae)

mins = []
averages = []
for i in shared_mae:
    mins.append([min(i[0]), min(i[1])])
    averages.append([sum(i[0])/len(i[0]), sum(i[1])/len(i[1])])


for j in range(0, len(shared_mae)):
    print(mins[j], averages[j])

### Isolating Increasing PPM

In [None]:
# Split the data labels with spin coating 0 and 1

increasing_index = [np.where(dataset['Increasing PPM'].to_numpy()  == 0)[0], np.where(dataset['Increasing PPM'].to_numpy()  == 1)[0]]

scaled_features = scaleDataset(all_features.copy())

#The full features of the data points that use Spin Coating
increasing_features = [scaled_features.iloc[sc_index[0]], scaled_features.iloc[sc_index[1]]]

#The stupid labels for Spin coating vs. not Spin coating
increasing_label = [data_labels.to_numpy()[sc_index[0]], data_labels.to_numpy()[sc_index[1]]]

increasing_mae = []
for i in range(0, 2):
    tmp_mae = []
    for NN in optimal_NNs:
        test_loss, test_mae, test_mse = NN.evaluate(increasing_features[i], increasing_label[i],batch_size=10,  verbose=1)
        tmp_mae.append(test_mae)

        #sc_predictions = correlation_plots(NN, sc_label[i], sc_features[i], "Testing Correlation Plot for SC " + str(i), "Actual", "Predicted")
        #plotGraph(sc_label[i], sc_predictions, "SC Plot")

    increasing_mae.append(tmp_mae)
    

for i in increasing_mae:
    print(min(i), sum(i)/len(i) )

### Repeat Sensor Use

In [None]:
# Split the data labels with RSU
repeat_index= []
for i in range(1, 4):
    repeat_index.append(np.where(dataset['Repeat Sensor Use'].to_numpy()  == i)[0])

shared_tr_1 = []
shared_tr_2 = []
shared_tr_3 = []

times_index = []
for i in range(0, 51):
    times_index.append(np.where(dataset['Time'].to_numpy()  == i)[0].tolist())

    tr_1_tmp = []
    tr_2_tmp = []
    tr_3_tmp = []

    for j in range(len(repeat_index)):
   
        for index_123 in repeat_index[j]:

            if index_123 in times_index[i] and j == 0:
                tr_1_tmp.append(index_123)
            elif index_123 in times_index[i] and j == 1:
                tr_2_tmp.append(index_123)
            elif index_123 in times_index[i] and j == 2:
                tr_3_tmp.append(index_123)

#            time_0_tmp.append(index_sc)
        

    shared_tr_1.append(tr_1_tmp)
    shared_tr_2.append(tr_2_tmp)
    shared_tr_3.append(tr_3_tmp)

scaled_features = scaleDataset(all_features.copy())
#The full features of the data points that use certain time values
tr_features = []
tr_labels = []


for i in range(0, 51):
    tr_features.append([
        scaled_features.iloc[shared_tr_1[i]], 
        scaled_features.iloc[shared_tr_2[i]], 
        scaled_features.iloc[shared_tr_3[i]]
        ])

    tr_labels.append([
        data_labels.to_numpy()[shared_tr_1[i]], 
        data_labels.to_numpy()[shared_tr_2[i]], 
        data_labels.to_numpy()[shared_tr_3[i]]
        ])

tr_mae = []
for i in range(15, 51):
    tr_tmp_mae = []

    for j in range(0, 3):

        tmp_mae = []
        
        for NN in optimal_NNs:
            test_loss, test_mae, test_mse = NN.evaluate(tr_features[i][j], tr_labels[i][j], batch_size=2,  verbose=0)
            tmp_mae.append(test_mae)

            #repeat_predictions = correlation_plots(NN, repeat_labels[i], repeat_features[i], "Testing Correlation Plot for RSU " + str(i), "Actual", "Predicted")
            #plotGraph(RSU_labels[i], RSU_predictions, "RSU Plot")

        tr_tmp_mae.append(tmp_mae)
    tr_mae.append(tr_tmp_mae)

mins = []
averages = []
for i in tr_mae:
    mins.append([min(i[0]), min(i[1]), min(i[2])])
    averages.append([sum(i[0])/len(i[0]), sum(i[1])/len(i[1]), sum(i[2])/len(i[2])])

for j in range(0, len(tr_mae)):
    print(mins[j], averages[j])






[0.2973724603652954, 0.37791651487350464, 0.26288270950317383] [0.3214408755302429, 0.4420969287554423, 0.4965585271517436]
[0.2853758633136749, 0.3996132016181946, 0.3658252954483032] [0.3088931043942769, 0.46263250708580017, 0.6525407632191976]
[0.2803540825843811, 0.3935277462005615, 0.25652050971984863] [0.30284111698468524, 0.4710976183414459, 0.4399184286594391]
[0.27316001057624817, 0.38111114501953125, 0.33783796429634094] [0.2943981885910034, 0.46818633874257404, 0.561365952094396]
[0.25914132595062256, 0.33417898416519165, 0.21398741006851196] [0.28361045320828754, 0.462371826171875, 0.4150985876719157]
[0.24737800657749176, 0.3475381135940552, 0.3814122676849365] [0.2739185442527135, 0.4527066151301066, 0.6169638832410177]
[0.24264444410800934, 0.35014915466308594, 0.2530318796634674] [0.268285408616066, 0.4416150152683258, 0.4307607014973958]
[0.2516763508319855, 0.3547285199165344, 0.2121509164571762] [0.27021826306978863, 0.40709011753400165, 0.3868235299984614]
[0.261774

### Days Elapsed

## 3D Plots

In [None]:
# Start 3D Plotting here

fig = plt.figure()
ax = plt.axes(projection='3d')


#xdata = np.sin(zd## 3D Plotsata) + 0.1 * np.random.randn(100)
#ydata = np.cos(zdata) + 0.1 * np.random.randn(100)
#ax.scatter3D(xdata, ydata, zdata, c=zdata, cmap='Greens');