In [1]:
import numpy as np
import math
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import ticker as ticker
import time
from typing import List, Tuple, Callable



In [13]:
# Import dependencies
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.utils import shuffle
import joblib
from sklearn.neural_network import MLPRegressor

In [3]:
def print_scores(scores):
        # Calculate MSE, RMSE, R2 values
    Best_CV_MSE = min(abs(scores['test_neg_mean_squared_error']))
    Best_CV_RMSE = np.sqrt(Best_CV_MSE)
    Best_CV_R2 = max(scores['test_r2'])
    Worst_CV_MSE = max(abs(scores['test_neg_mean_squared_error']))
    Worst_CV_RMSE = np.sqrt(Worst_CV_MSE)
    Worst_CV_R2 = min(scores['test_r2'])
    Ave_CV_MSE = np.average(abs(scores['test_neg_mean_squared_error']))
    Ave_CV_RMSE = np.sqrt(Ave_CV_MSE)
    Ave_CV_R2 = np.average(scores['test_r2'])

    # Print out cross validation history data
    print("~~~~ Cross Validation Results ~~~~")
    print("Best MSE CV: ", Best_CV_MSE)
    print("Best RMSE CV: ", Best_CV_RMSE)
    print("Best R2 CV: ", Best_CV_R2)
    print("Worst MSE CV: ", Worst_CV_MSE)
    print("Worst RMSE CV: ", Worst_CV_RMSE)
    print("Worst R2 CV: ", Worst_CV_R2)
    print('Average MSE CV: ', Ave_CV_MSE)
    print('Average RMSE CV: ', Ave_CV_RMSE)
    print("Average R2 CV: ", Ave_CV_R2)
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")


def ANN_Train(X, y, test_size, train_size, k, epochs, hidden_nodes, hidden_layers, batch_size, learning_rate):
    hidden_layer_sizes = tuple(np.full(hidden_layers, hidden_nodes))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, train_size=train_size, random_state=42)
   
    # Feature Scaling
    model = Pipeline([('scaler', StandardScaler(with_mean=True)), 
                      ('ANN', MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, 
                                           activation='relu', solver='adam',      
                                           batch_size=batch_size, max_iter=epochs, 
                                           learning_rate_init=learning_rate))])

    print("Cross validating...")
    scores = cross_validate(model, X_train, y_train, cv=k, scoring=('r2', 'neg_mean_squared_error'), return_train_score=True)
    print_scores(scores)

    print("Fitting model...")

    # Fit model
    model.fit(X_train, y_train)

    # Output cross validation data
    CrossValData = [Best_CV_MSE, Best_CV_RMSE, Best_CV_R2, Worst_CV_MSE, Worst_CV_RMSE, Worst_CV_R2, Ave_CV_MSE, Ave_CV_RMSE, Ave_CV_R2]

    return model, X_test, y_test, CrossValData



def ANN_Test(model, X_test, y_test, DataEfficiencyToggle):

    print("Testing model...")

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate R2, RMSE, and MSE metrics for test data
    SparseDataTest_R2 = r2_score(y_test, y_pred)
    SparseDataTest_MSE = mean_squared_error(y_test, y_pred, squared=True)
    SparseDataTest_RMSE = np.sqrt(SparseDataTest_MSE)

    # Print model info after CV
    print("~~~~~~~~~ Final Model Structure Info ~~~~~~~~~")
    print("Number of Layers: ", model[1].n_layers_)
    print("Number of input features: ", model[1].n_features_in_)
    print("Number of outputs: ", model[1].n_outputs_)
    print("Number of iterations ran: ", model[1].n_iter_)
    print("~~~~~~~~~ Final Model Error Info ~~~~~~~~~")
    print("Test MSE: ", Test_MSE)
    print("Test RMSE: ", Test_RMSE)
    print("Test R2: ", Test_R2)
    print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

    # Plot ANN results.
    fig, ax = plt.subplots(1, 2)
    fig.suptitle("ANN Results")

    ax[0].plot(np.linspace(1, len(model[1].loss_curve_), len(model[1].loss_curve_)), model[1].loss_curve_)
    ax[0].set_xlabel("Epoch")
    ax[0].set_ylabel("Loss (Mean Squared Error)")
    ax[0].set_title('Epoch History, MSE: {:.2f}, RMSE: {:.2f}, R2: {:.2f}'.format(Test_MSE, Test_RMSE, Test_R2), fontsize=7)

    # Parity Plot
    ax[1].plot(y_test, y_pred, 'r*')
    ax[1].set_xlabel("y_test")
    ax[1].set_ylabel("y_pred")
    ax[1].set_title('Parity Plot, MSE: {:.2f}, RMSE: {:.2f}, R2: {:.2f}'.format(Test_MSE, Test_RMSE, Test_R2), fontsize=7)
    plt.tight_layout()

    return SparseDataTest_R2, SparseDataTest_RMSE, SparseDataTest_MSE,


In [12]:
# Dataset Import
df = pd.read_csv('merged_beadnumber.csv', usecols=lambda x: 'Unnamed' not in x)


Unnamed: 0,Power Setpoint (kW),Dwell Time (s),BeadNumber,mean_Current,mean_MovAvgCurrent,skew_Current,skew_MovAvgCurrent,std_Current,std_MovAvgCurrent,kurt_Current,...,kurt_Voltage,kurt_MovAvgVoltage,mean_CTWD,mean_MovAvgCTWD,skew_CTWD,skew_MovAvgCTWD,std_CTWD,std_MovAvgCTWD,kurt_CTWD,kurt_MovAvgCTWD
0,1.5,0,1,173.567890,173.748810,0.002152,-0.512709,18.939622,5.162724,-0.523618,...,-0.258407,0.033431,0.988854,0.999496,-0.454550,-1.020466,0.515677,0.326603,-0.058903,1.204262
1,1.5,0,2,170.311765,170.211795,-0.071974,-1.968130,22.574079,7.505705,-0.287486,...,2.877873,3.666621,0.581278,0.580729,-0.031304,-0.596793,0.495253,0.271027,-0.302430,-0.038122
2,1.5,0,3,175.394545,175.448318,-0.002805,-0.156754,15.357727,5.627331,-0.502931,...,6.673105,7.447397,0.924177,0.907150,-0.160743,-1.305642,0.517577,0.253579,0.836489,3.015502
3,1.5,0,4,173.003670,173.030292,-0.041959,-0.929278,14.683835,4.502085,-0.132920,...,5.363752,3.285875,0.819607,0.810987,0.447147,-0.380226,0.671712,0.381049,-0.061450,-0.581963
4,1.5,0,5,173.602752,173.700845,-0.138773,-0.036100,14.436024,5.035746,0.088436,...,3.624624,1.791802,1.210140,1.198554,0.597192,0.264450,0.680301,0.377828,0.851019,-0.234001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2.1,60,16,167.720833,167.607938,-0.041573,-1.122775,13.381441,5.029456,-0.105404,...,8.159863,8.934576,2.152819,2.145960,0.207276,1.246630,1.042310,0.655879,0.516208,2.036745
236,2.1,60,17,167.516972,167.507158,0.015535,-0.735138,15.869643,6.577225,-0.272495,...,1.767405,0.950224,2.412225,2.395785,-0.043968,1.261754,0.772440,0.509553,1.232425,2.461461
237,2.1,60,18,166.976959,167.096921,0.106287,-0.950425,16.090883,5.724559,0.050415,...,6.823190,13.921013,2.305787,2.317585,-0.318839,-0.954914,1.035523,0.528071,0.442239,1.283591
238,2.1,60,19,168.792202,168.753679,0.093038,-0.909722,15.807333,6.356482,-0.562118,...,8.993393,12.293009,2.789633,2.744180,1.259364,2.100481,0.801301,0.549638,3.993076,5.431556


## Artificial Neural Network Parameters
### Constants 
 - test_size, train_size = 0.2, 0.8 - standard train/test split
 - k: cross-validation folds 
 - activation: Activation function for the hidden layer
 - solver: solver for weight optimization (adam)
  - epochs: Maximum number of iterations for solving
 - learning_rate : Initial learning weight for step-size in updating weights.
 ### Hyperparameters
 - Hidden_layers_sizes: Tuples of layers and those layer sizes The ith element represents the number of neurons in the ith hidden layer.
 - hidden_nodes: Number of neurons per hidden layer
 - batch_size: Size of minibatches

In [25]:
# Define ANN model hidden layer structure and training parameters
test_size, train_size = 0.2, 0.8
k = 5

# Develop hidden layer tuples 
hidden_layers = [1, 3, 5]
hidden_nodes = [3, 5, 7]
hidden_layer_sizes  = [tuple(np.full(lay, nodes)) for lay in hidden_layers for nodes in hidden_nodes]
hidden_layer_sizes += [(3,5,3), (3,5,7,5,3), (5,7,5)]

# Initialize parmesan cheese
params = {'hidden_layer_sizes': hidden_layer_sizes,
         'learning_rate_init': 0.01,
         'epochs': 500,
         'batch_size': 100,
         }

val = "Current" # ['Current', 'Voltage', 'CTWD']
# 
featlist = ['std', 'skew', 'kurt', 'mean']
features = [f'{feat}_MovAvg{val}' for feat in featlist]
print(features)
    # Parse dataset
    
def ANN_GridSearch(X,y, params):
    model = Pipeline([('scaler', StandardScaler(with_mean=True)), 
                  ('ANN', MLPRegressor(activation='relu', solver='adam'))])
    clf = GridSearchCV(model, params)
    clf.fit(X, y)
    


['std_MovAvgCurrent', 'skew_MovAvgCurrent', 'kurt_MovAvgCurrent', 'mean_MovAvgCurrent']


In [None]:

#PacketNumber = # any integer value between 1 and 10
#LayerNumbers = # any integer value between 1 and 20

# Iterate through data stream types (ie current, CTWD, etc.)

# Define input and output variables (X and y)
X = 
# Train ANN
model, X_test, y_test, CrossValData = ANN_Train(X, y, test_size, train_size, k, epochs, hidden_nodes, hidden_layers, batch_size, learning_rate)

# Test ANN
SparseDataTest_R2, SparseDataTest_RMSE, SparseDataTest_MSE = ANN_Test(model, X_test, y_test, DataEfficiencyToggle)

# Show plots
plt.show()