In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import scipy.special
import math
import time
from sklearn import model_selection
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from itertools import product
import subprocess as sp
import os
from IPython.display import display, clear_output, Math, Latex

import nbimporter
from Class_FEA import *
from Class_Data_Prep import *
import util_func

tf.keras.backend.set_floatx('float64')
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
#from keras import callbacks

plt.rcParams.update({'font.size':24, 'axes.linewidth':2, 'font.family':'Arial'})

In [None]:
# Declare Abaqus job name, file path to job folder, and user subroutine name 
job_name = r"HGO_Tear_Propagation"
job_directory = r"D:\Users\Will\Tear_Propagation_Project"
umat_name = r"UMAT_HGO_C_Damage_Alpha"

# Declare work and data directories
work_directory = r"C:\Users\swilli9\STRETCH_Lab_Projects\Tear_Propagation_Project"
data_directory = r"{path}\Data_and_Visualizations".format(path=work_directory)

# Initialize FEA class to retrieve FEA information that is relevant to ROM
FE = Abaqus(job_name, job_directory, data_directory, BC_file=r"{path}\{job}_BC_mask.csv"
                      .format(path=data_directory,job=job_name))

# Define beta parameters
beta = np.array([[16.75, 40.78],
                 [26.11, 18.85],
                 [69.21, 63.20]])

# Initialize class for handling POD-BFGS functions and storing ROM data
data = FE_Data_Prep(beta, FE)

# Initialize storage for time recording
offline_times = dict()
online_times = dict()


In [None]:
# Set control for SVD calculation based on whether the calculation has already been run
SVD_files = False

# Run SVD if needed
if not SVD_files:
    
    # Track run time of svd for offline time summary
    start = time.time()
    
    PhiT, sigma, PsiT  = np.linalg.svd(data.y)
    
    end = time.time()
    
    # store SVD_time as variable
    SVD_time = end-start
    
    #check cumulative energy for RIC analysis
    cumulative_energy = np.cumsum(sigma)/np.sum(sigma)
    high_energy_index = np.argwhere(cumulative_energy>=0.9999)[0]
    L = int(5*np.ceil(high_energy_index / 5).item())
    
    # Save necessary subset of singular vectors to dataframes
    PsiT = PsiT[:2000, :]
    PhiT = PhiT[:, :2000]
    
    pd.DataFrame(sigma).to_csv(r"{path}_Sigma.csv".format(path=data.data_path))
    pd.DataFrame(PsiT).to_csv(r"{path}_Psi.csv".format(path=data.data_path))
    pd.DataFrame(PhiT).to_csv(r"{path}_Phi.csv".format(path=data.data_path))

# Load SVD results from files if available
else:
    sigma = pd.read_csv(r"{path}_Sigma.csv".format(path=data.data_path), index_col=0).to_numpy().flatten()
    PsiT = pd.read_csv(r"{path}_Psi.csv".format(path=data.data_path), index_col=0).to_numpy()
    PhiT = pd.read_csv(r"{path}_Phi.csv".format(path=data.data_path), index_col=0).to_numpy()
    
    # Compute cumulative energy for RIC analysis
    cumulative_energy = np.cumsum(sigma)/np.sum(sigma)
    high_energy_index = np.argwhere(cumulative_energy>=0.9999)[0]
    L = int(5*np.ceil(high_energy_index / 5).item())


In [None]:
# Plot singular value decay and cumulative energy  
fig, ax = plt.subplots(1,2, figsize=(15,7.5), dpi=600)

ax[0].semilogy(sigma, marker='o', ms=6, linestyle='')

ax[1].plot(cumulative_energy, marker='o', ms=6, linestyle='')

# Add indicators for 0.9999 RIC threshold
ax[0].axvline(x=L, color="red", linestyle="--")
ax[1].axvline(x=L, color="red", linestyle="--")
ax[0].text(0.11, 0.65, r'$l={r}$'.format(r=L), transform=ax[0].transAxes, size=14)
ax[1].text(0.11, 0.65, r'$l={r}$'.format(r=L), transform=ax[1].transAxes, size=14)


ax[0].set_ylabel('Singular Values ($\sigma_{l}$)')
ax[1].set_ylabel('Cumulative Energy')
for c in [0, 1]:
    ax[c].set_xlabel('$l$')
    ax[c].set_xlim([-300, 6500])
    ax[c].set_xticks(range(0,6001,2000))

fig.tight_layout(pad=2.0)

# Save figure to EPS file
fig.savefig(r"{path}_sigma_decay_and_cumulative_energy.eps".format(path=data.data_path))


In [None]:
# A general class for constructing neural network architecture
class NN_Architecture(tf.keras.Model):
    def __init__(self,
            num_hidden_layers=2, 
            num_neurons_per_layer=2**7,
            input_dim=1,
            output_dim=1,
            activation=tf.keras.activations.swish,
            kernel_initializer=tf.keras.initializers.GlorotNormal(seed=42),
            bias_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.5, seed=42),
            NN_type = 'DNN',
            **kwargs):
        
        # Intialize superclass with its default parameter signature
        super().__init__(**kwargs)
        
        # Store hyperparameters
        self.num_hidden_layers = num_hidden_layers
        self.output_dim = output_dim     
        
        # Select DNN architecture
        if NN_type == 'DNN':
            self.hidden = [tf.keras.layers.Dense(num_neurons_per_layer,
                             activation=tf.keras.activations.get(activation),
                             kernel_initializer=kernel_initializer, 
                             bias_initializer=bias_initializer)
                             for _ in range(self.num_hidden_layers)]
        # Select CNN with concolutional layer and two dense layers
        elif NN_type == 'CNN':
            conv_list = [tf.keras.layers.Conv1D(32, 3,
                             activation=tf.keras.activations.get(activation),
                             data_format="channels_last",
#                              kernel_initializer=kernel_initializer, 
#                              bias_initializer=bias_initializer, 
                             input_shape=(input_dim,1)),
                         tf.keras.layers.Flatten()]
            
            self.hidden = conv_list + [tf.keras.layers.Dense(int((x+1)*(num_neurons_per_layer)/2),
                             activation=tf.keras.activations.get(activation),
                             kernel_initializer=kernel_initializer, 
                             bias_initializer=bias_initializer)
                             for x in range(self.num_hidden_layers)]
            
            self.num_hidden_layers = len(self.hidden)
        
        # Set output layer
        self.out = tf.keras.layers.Dense(output_dim)
        
    
    # Mimic functionality of model(x)
    def call(self, X):
        
        #Forward-pass through neural network.
        Z = self.hidden[0](X)
        for i in range(1, self.num_hidden_layers):
            Z = self.hidden[i](Z)
        
        # return output
        return self.out(Z)


In [None]:
# A class for conducting ML training and solving the single domain ML problem
class ML_Solver:
    def __init__(self, train, test, model_c):
        
        # save NN model to class object
        self.model_c = model_c
        
        # separate training and testing data for validation to class object
        self.x = train[0]
        self.y = train[1]
        
        self.x_val = test[0]
        self.y_val = test[1]
        
        # save maximum y value for normalization to class object
        self.y_max = tf.reduce_max(tf.math.abs(self.y), 0, keepdims=True)

    
    # Compute loss and enforce system and Schwarz boundaries weakly 
    def get_loss(self, x, y):
        
        # scale predictions up from normalization
        pred = self.model_c(x)*self.y_max

        # Compute RMSE-based loss over output tensor
        temp_0 = (1/pred.shape[1])*tf.reduce_sum( tf.square(pred - y) , axis=1)
        loss = tf.sqrt((1/temp_0.shape[0])*tf.math.reduce_sum(temp_0))   

        return loss

    # Gets trainable paramter gradients
    def get_gradient(self, x, y):
        with tf.GradientTape() as tape:
            # This tape is for derivatives with respect to trainable variables
            tape.watch(self.model_c.trainable_variables)
            
            loss = self.get_loss(x, y)

        # compute and return gradients for trainable variables
        g = tape.gradient(loss, self.model_c.trainable_variables)

        return g
    
    # Primary solving function for ML training
    def solve(self, optimizer, numEpochs, batch_size):

        # funtion wrapper for graph mode implementation
        @tf.function
        def train_step(x, y):
            # Retrieve loss gradient w.r.t. trainable variables
            grad_theta = self.get_gradient(x, y)

            # Perform gradient descent step
            optimizer.apply_gradients(zip(grad_theta, self.model_c.trainable_variables))

        # Split data into training batches
        train_dataset = tf.data.Dataset.from_tensor_slices((self.x, self.y))
        train_dataset = train_dataset.shuffle(buffer_size=self.x.shape[0], seed=42).batch(batch_size)
 
        self.training_loss = []
        self.validation_loss = []
        train_time = 0
        # Iterate training
        for i in range(numEpochs):
            
            # Begin tracking training time
            start = time.time()
            
            # Train on each batch
            for (x_batch_train, y_batch_train) in train_dataset:
                train_step(x_batch_train, y_batch_train)
            
            #End tracking training time
            end = time.time()
            
            # Add this epoch to training time total
            train_time += end-start
            
            # Compute loss for full dataset to track training progress
            loss_train = self.get_loss(self.x, self.y)
            loss_val = self.get_loss(self.x_val, self.y_val)
            
            self.training_loss.append(loss_train)
            self.validation_loss.append(loss_val)
            
            # Display current state of training process to user
            clear_output(wait=True)
            display("Epoch {j}:".format(j=i+1))
            display("    Training Loss = {loss}".format(loss=loss_train))
            display("    Validation Loss = {loss}".format(loss=loss_val))
        
        # return total training time
        return train_time
            
    # Check GPU memory during runs for optimizing batch size    
#     def get_gpu_memory(self):
#         command = "nvidia-smi --query-gpu=memory.free --format=csv"
#         memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:]
#         memory_free_values = [int(x.split()[0]) for i, x in enumerate(memory_free_info)]
#         return memory_free_values
#         base = tf.constant(10.0, shape = y_signs.shape, dtype=tf.float64)
#         model_x = tf.math.pow(base,self.model_c(x))*y_signs 
#         tf.reduce_std(y, 0, keepdims=True) + tf.reduce_mean(y, 0, keepdims=True)
#         temp_0 = tf.math.reduce_sum(tf.square( self.model_c(x) - y ), axis=1)
#         temp_1 = tf.math.reduce_sum(tf.square( y ), axis=1)
#         phi_c = tf.sqrt(tf.math.reduce_sum(temp_0/temp_1))
#         phi_c = self.a*tf.math.reduce_mean(tf.square( self.model_c(x) - y ))


In [None]:
# Initialize variables containing test-case controls
train_data_types = ["POD", "full"]
NN_types = ["DNN", "CNN"]
POD_dimensions = [i for i in range(1,6)]+[j for j in range(10,26,5)]+[k for k in range(50,126,25)] + [q for q in range(175,326,50)]

# normalize input data and prepare tensor objects for use
x_temp = data.x
x_normal_feature = np.hstack([np.expand_dims(x_temp[:,i]/np.max(x_temp[:,i]), axis=1) for i in range(x_temp.shape[1])])
x = x_normal_feature
y = data.y
y_tf = tf.constant(y)

# set hyperparameters and random seed
numEpochs = 2**10
n_split = 5
timing_reps = 10
batch_size = x.shape[0]
input_dim = x.shape[1]
tf.keras.utils.set_random_seed(42)

# Initialize dataframes for output data
columns_pareto = ["NN_type", "POD_dim", "train_time", "test_time", "approx_error", "approx_rel_error"]
pareto_frame = pd.DataFrame(columns=columns_pareto)

columns_loss = ["NN_type", "POD_dim", "k-fold"] + ["L(epoch_{i})".format(i=e) for e in range(1,numEpochs+1)]
columns_snap_err = ["NN_type", "POD_dim"] + ["E(p_{i})".format(i=s) for s in range(1,x.shape[0]+1)]
train_loss_frame = pd.DataFrame(columns=columns_loss)
val_loss_frame = pd.DataFrame(columns=columns_loss)
snap_err_frame = pd.DataFrame(columns=columns_snap_err)

In [None]:
# Main data gathering loop, iterates over NN types (CNN, DNN) and output data types (POD, full)
for dtype, nntype in product(train_data_types, NN_types):
    
    # shape x arrays based on NN input dimension requirements
    if nntype == 'DNN':
        x_nn = x 
    elif nntype == 'CNN':
        x_nn = np.reshape(x, (x.shape[0],x.shape[1],1))
    
    # convert x arrays to tf tensor
    x_tf = tf.constant(x_nn)
    
    # Select controls for POD-ML
    if dtype == 'POD':
        
        # Set optimizer parameters for POD-ML
        lr = 1e-3 # tf.keras.optimizers.schedules.PiecewiseConstantDecay([2**6,2**7,2**9],[1e-2,5e-3,1e-3,5e-4])
        b1 = 0.95
        optimizer = tf.keras.optimizers.Adamax(learning_rate=lr, beta_1=b1, epsilon=1e-7)

        # Run POD-ML for all desired basis dimensions
        for l in POD_dimensions:
            
            # Compute true POD coefficients, yPOD, for given basis
            Ur = PhiT[:,:l]
            Sr = np.diag(sigma[:l])
            Vhr = tf.constant(PsiT[:l,:])
            yPOD = np.matmul(Ur, Sr)
            
            # Save true POD displacement field approximation for selected basis dimensions
            if (l==1 or l==10 or l==325):
                U_svd = np.matmul(yPOD, Vhr)
                pd.DataFrame(U_svd).to_csv(r"{path}_U_svd_{L}.csv".format(path=data.data_path, L=l))
                field_dof_err = np.zeros_like(U_svd)
                snap_err = np.zeros((x_nn.shape[0],))
                
            # Perform k-fold cross-validation procedure
            for iModel, (train_index, test_index) in enumerate(model_selection.KFold(n_split, shuffle=True).split(x_nn)):
        
                # Separate training and testing inputs
                if nntype=="CNN":
                    xtrain, xtest = tf.constant(x_nn[train_index]), tf.constant(x_nn[test_index])
                else:
                    xtrain, xtest = tf.constant(x_nn[train_index]), tf.constant(x_nn[test_index])
                
                # Convert POD coefficient training and testing data to tf tensors
                ytrain, ytest = tf.constant(yPOD[train_index]), tf.constant(yPOD[test_index])

                # Initialize NN model
                pod_ml_model = NN_Architecture(num_hidden_layers=2, num_neurons_per_layer=2**6, 
                                               input_dim = input_dim, output_dim = l, NN_type=nntype)

                # Build DNN if necessary
                if nntype == 'DNN':
                    pod_ml_model.build(input_shape=(None, input_dim))

                # Initialize ML solver
                pod_ml_solver = ML_Solver( (xtrain, ytrain), (xtest, ytest), pod_ml_model )

                # Perform ML training
                train_time = pod_ml_solver.solve(optimizer, numEpochs, batch_size)

                # Record POD-ML offline time
                train_time += SVD_time

                # Pre-run prediction in function wrapper
                model_predict = tf.function(pod_ml_model) 
                
                # compute POD y_max for online time testing
                y_max = tf.math.reduce_max(tf.math.abs(yPOD), 0, keepdims=True)

                # Test and record average online prediction time
                pred_time = 0
                for _ in range(timing_reps):
                    start = time.time()

                    pred = model_predict(x_tf)*y_max

                    end = time.time()

                    pred_time += end-start
                pred_time = pred_time/timing_reps
                
                # Compute POD-ML prediction of displacement field
                U_pred = np.matmul(pred, Vhr)

                # compute relative and non-relative weighted RMSE between POD-ML and FE displacement results
                approx_square_norm = (1/U_pred.shape[1])*np.sum(np.square(U_pred-y_tf), axis=1)
                approx_rel_square_norm = approx_square_norm/np.sum(np.square(y_tf), axis=1)
                
                approx_rmse = np.sqrt( (1/len(approx_square_norm)) * np.sum(approx_square_norm) ).item()
                approx_rmse_rel = np.sqrt( (1/len(approx_rel_square_norm)) * np.sum(approx_rel_square_norm) ).item()
                
                # Save relevant results to dataframe
                pareto_frame.loc[len(pareto_frame)] = [nntype, l, train_time, pred_time, approx_rmse, approx_rmse_rel]

                # Sum snapshot and nodal errors and 
                # save training and validation loss results for selected basis dimensions
                if (l==1 or l==10 or l==325):
                    field_dof_err += np.sqrt(np.square(U_pred-U_svd))
                    snap_err += np.sqrt(approx_square_norm).flatten()
                    
                    loss_id = np.array([nntype, l, iModel+1])
                    training_loss_row = np.hstack( (loss_id, np.array(pod_ml_solver.training_loss)) )
                    validation_loss_row = np.hstack( (loss_id, np.array(pod_ml_solver.validation_loss)) )
                    
                    train_loss_frame.loc[len(train_loss_frame)] = training_loss_row
                    val_loss_frame.loc[len(val_loss_frame)] = validation_loss_row

            # Save snapshot-specific and nodal RMSE for selected basis dimensions
            if (l==1 or l==10 or l==325):
                field_dof_err = field_dof_err/n_split
                pd.DataFrame(field_dof_err).to_csv(
                                                    r"{path}_field_dof_error_pod_{L}_{nn}.csv".format(path=data.data_path, L=l, nn=nntype)
                                                    )
                
                snap_err = snap_err/n_split
                err_id = np.array([nntype, l])
                snap_err_row = np.hstack((err_id, snap_err))
                
                snap_err_frame.loc[len(snap_err_frame)] = snap_err_row

    # Select controls for full-field output ML
    else:
        
        # Set optimizer parameter for full-field ML
        l=0
        lr = 5e-4 # tf.keras.optimizers.schedules.PiecewiseConstantDecay([2**6,2**7,2**9],[1e-2,5e-3,1e-3,5e-4])
        b1 = 0.9
        optimizer = tf.keras.optimizers.Adamax(learning_rate=lr, beta_1=b1, epsilon=1e-7)

        # convert training data array to tf tensor and initialize results data storage
        y_tf = tf.constant(y)
        field_dof_err = np.zeros_like(y)
        snap_err = np.zeros((x_nn.shape[0],))

        # Perform k-fold cross-validation procedure
        for iModel, (train_index, test_index) in enumerate(model_selection.KFold(n_split, shuffle=True).split(x_nn)):

            # Separate training and testing data and store in tf tensors
            xtrain, xtest = tf.constant(x_nn[train_index]), tf.constant(x_nn[test_index])

            ytrain, ytest = tf.constant(y[train_index]), tf.constant(y[test_index])

            # Intialize ML model
            ml_model = NN_Architecture(num_hidden_layers=2, num_neurons_per_layer=2**6, 
                                           input_dim = input_dim, output_dim = y.shape[1], NN_type=nntype)

            # Build DNN if necessary
            if nntype == 'DNN':
                ml_model.build(input_shape=(None, input_dim))

            # Initialize ML solver
            ml_solver = ML_Solver( (xtrain, ytrain), (xtest, ytest), ml_model )

            # Train ML model
            train_time = ml_solver.solve(optimizer, numEpochs, batch_size)

            # Pre-run prediction in function wrapper
            model_predict = tf.function(ml_model) 
            
            # Compute y_max for online time testing
            y_max = tf.math.reduce_max(tf.math.abs(y_tf), 0, keepdims=True)

            # Test and record average online prediction time
            pred_time = 0
            for _ in range(timing_reps):
                start = time.time()

                pred = model_predict(x_tf)*y_max

                end = time.time()

                pred_time += end-start
            pred_time = pred_time/timing_reps
            
            # compute relative and non-relative weighted RMSE between POD-ML and FE displacement results
            approx_square_norm = (1/U_pred.shape[1])*np.sum(np.square(U_pred-y_tf), axis=1)
            approx_rel_square_norm = approx_square_norm/np.sum(np.square(y_tf), axis=1)
            
            approx_rmse = np.sqrt( (1/len(approx_square_norm)) * np.sum(approx_square_norm) ).item()
            approx_rmse_rel = np.sqrt( (1/len(approx_rel_square_norm)) * np.sum(approx_rel_square_norm) ).item()

            # save relevant results to dataframe
            pareto_frame.loc[len(pareto_frame)] = [nntype, l, train_time, pred_time, approx_rmse, approx_rmse_rel]

            # Sum snapshot specific and nodal error
            field_dof_err += np.sqrt(np.square(pred-y_tf))
            snap_err += np.sqrt(approx_square_norm).flatten()

            # Save training and validation loss results
            loss_id = np.array([nntype, l, iModel+1])
            training_loss_row = np.hstack( (loss_id, np.array(ml_solver.training_loss)) )
            validation_loss_row = np.hstack( (loss_id, np.array(ml_solver.validation_loss)) )

            train_loss_frame.loc[len(train_loss_frame)] = training_loss_row
            val_loss_frame.loc[len(val_loss_frame)] = validation_loss_row

        # Save snapshot and nodal RMSE results
        field_dof_err = field_dof_err/n_split
        pd.DataFrame(field_dof_err).to_csv(
                                            r"{path}_field_dof_error_full_{nn}.csv".format(path=data.data_path, nn=nntype)
                                            )

        snap_err = snap_err/n_split
        err_id = np.array([nntype, l])
        snap_err_row = np.hstack((err_id, snap_err))

        snap_err_frame.loc[len(snap_err_frame)] = snap_err_row

display("ANALYSIS COMPLETE.")

# Save all results dataframes to CSV for later use
pareto_frame.to_csv(r"{path}_pareto_frame.csv".format(path=data.data_path))
train_loss_frame.to_csv(r"{path}_train_loss_frame.csv".format(path=data.data_path))
val_loss_frame.to_csv(r"{path}_val_loss_frame.csv".format(path=data.data_path))
snap_err_frame.to_csv(r"{path}_snap_err_frame.csv".format(path=data.data_path))


In [None]:
# Compute true POD approximation error as benchmark for POD-ML
svd_err = []
for l in POD_dimensions:
    Ur = PhiT[:,:l]
    Sr = np.diag(sigma[:l])
    Vhr = tf.constant(PsiT[:l,:])

    yPOD = np.matmul(Ur, Sr)
    
    U_svd = np.matmul(yPOD, Vhr)
    
    temp = (1/U_svd.shape[1])*np.sum(np.square(U_svd-y_tf), axis=1)
    
    svd_err.append(np.sqrt( (1/len(temp)) * np.sum(temp) ).item())
    
pd.DataFrame(svd_err).to_csv(r"{path}_SVD_norm_error.csv".format(path=data.data_path))

In [None]:
# Load results dataframes
pareto_frame = pd.read_csv(r"{path}_pareto_frame.csv".format(path=data.data_path), index_col=0)
train_loss_frame = pd.read_csv(r"{path}_train_loss_frame.csv".format(path=data.data_path), index_col=0)
val_loss_frame = pd.read_csv(r"{path}_val_loss_frame.csv".format(path=data.data_path), index_col=0)
snap_err_frame = pd.read_csv(r"{path}_snap_err_frame.csv".format(path=data.data_path), index_col=0)
svd_err=pd.read_csv(r"{path}_SVD_norm_error.csv".format(path=data.data_path), index_col=0).values.flatten()

# Plot POD-ML versus true POD results for RMSE with respect to FE versus basis dimension, l

pareto_results = pareto_frame.groupby(["NN_type","POD_dim"])[["train_time", "test_time", "approx_error"]].mean()

CNN_results = pareto_results.loc["CNN"]
DNN_results = pareto_results.loc["DNN"]

marker = ["o", "^", "s", "v", "P", "*", "D", "p"]
marker_size = 10

fig, ax = plt.subplots(1,1, figsize=(15,7.5), dpi=600)
    
ax.semilogy(DNN_results.index[1:], DNN_results["approx_error"][1:], marker=marker[0], ms=marker_size, linestyle='', label="DNN POD-ML")
ax.semilogy(CNN_results.index[1:], CNN_results["approx_error"][1:], marker=marker[1], ms=marker_size, linestyle='', label="CNN POD-ML")
ax.semilogy(DNN_results.index[1:], svd_err, marker=marker[2], ms=marker_size, linestyle='', label="True POD")

ax.set_ylabel('$\mathcal{E}_{POD-ML}$')
ax.set_xlabel('$l$')
ax.set_xlim([-10, 335])
ax.set_ylim([1e-6, 5e-1])
ax.set_xticks([1]+[_ for _ in range(25,326,25)])
ax.legend(loc='best', fontsize=18, ncol=1, markerscale = 1, columnspacing=0.8, 
             handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')

fig.tight_layout(pad=2.0)
fig.savefig(r"{path}_pod_ml_approx_error.eps".format(path=data.data_path))



In [None]:
# PLot ML and POD-ML results for RMSE versus training time and prediction time

marker = ["o", "^", "s", "v", "P", "*", "D", "p"]
fig, ax = plt.subplots(1,2, figsize=(15,7.5), dpi=600)

marker_size = 10

ax[0].loglog(CNN_results["train_time"][0], CNN_results["approx_error"][0], marker=marker[0], ms=marker_size, linestyle='', label="CNN")
ax[0].loglog(DNN_results["train_time"][0], DNN_results["approx_error"][0], marker=marker[1], ms=marker_size, linestyle='', label="DNN")
ax[0].loglog(CNN_results["train_time"][1:], CNN_results["approx_error"][1:], marker=marker[2], ms=marker_size, linestyle='', label="CNN POD-ML")
ax[0].loglog(DNN_results["train_time"][1:], DNN_results["approx_error"][1:], marker=marker[3], ms=marker_size, linestyle='', label="DNN POD-ML")
ax[0].set_xlim([10, 1100])


ax[1].semilogy(CNN_results["test_time"][0], CNN_results["approx_error"][0], marker=marker[0], ms=marker_size, linestyle='', label="CNN", zorder=15)
ax[1].semilogy(DNN_results["test_time"][0], DNN_results["approx_error"][0], marker=marker[1], ms=marker_size, linestyle='', label="DNN", zorder=10)
ax[1].semilogy(CNN_results["test_time"][1:], CNN_results["approx_error"][1:], marker=marker[2], ms=marker_size, linestyle='', label="CNN POD-ML", zorder=5)
ax[1].semilogy(DNN_results["test_time"][1:], DNN_results["approx_error"][1:], marker=marker[3], ms=marker_size, linestyle='', label="DNN POD-ML", zorder=0)
ax[1].set_xlim([0.002, 0.008])

ax[0].set_xlabel('Training Time (s)')
ax[1].set_xlabel('Prediction Time (s)')

fig.supylabel('$\mathcal{E}_{ML}$ / $\mathcal{E}_{POD-ML}$', x=0.05, y=0.55)

for c in range(len(ax)):
    ax[c].set_ylim([0.008, 1])
    ax[c].legend(loc='best', fontsize=18, ncol=1, markerscale = 1, columnspacing=0.8, 
             handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')

fig.tight_layout(pad=2.0)
fig.savefig(r"{path}_pareto_train_predict.eps".format(path=data.data_path))



In [None]:
# Plot ML and POD-ML results for RMSE of individual snapshots across parameter cases and luminal pressures

xplot = np.array(data.x[:,3]).flatten()
start_ind = np.sort(np.argsort(xplot)[:8])
stop_ind = np.hstack([start_ind[1:], len(xplot)])
marker = ["o", "^", "s", "v", "P", "*", "D", "p"]
color = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#FFE933', '#e377c2', '#33E6FF']

fig, ax = plt.subplots(4,2, figsize=(15,20), dpi=600)
# ax=ax.flatten()
df_temp = snap_err_frame.reindex([6, 0, 1, 8, 2, 7, 3, 9, 4, 5])

r=0
c=0
for f in range(len(snap_err_frame)):
    row = df_temp.iloc[f]
    E = np.array(row.iloc[2:].values, dtype=float) 
    
    if row["POD_dim"] == 25:
        continue
        
    if row["POD_dim"] == 0:
        title = "{nn}".format(nn=row["NN_type"])
    else:
        title = "{nn} POD $l={L}$".format(nn=row["NN_type"], L=row["POD_dim"])
        l=int(row.iloc[1])
    
    if (row["NN_type"] == "CNN" and row["POD_dim"]==0):
        r=0
        c=1
    
    for i in range(len(stop_ind)):
        label = "$\mu_{b}$".format(b=i+1)    
        ax[r,c].semilogy(xplot[start_ind[i]:stop_ind[i]], E[start_ind[i]:stop_ind[i]], color=color[i], marker=marker[i], ms=6, linestyle='', label=label)

    ax[r,c].set_title(title)
    ax[r,c].set_xlim([-2, 53])
    ax[r,c].set_xticks(np.array(range(0,51,5)))
    ax[r,c].set_ylim([float(f'1e-3'), float(f'1e0')])
    
    if r+c == 0:
        ax[r,c].legend(loc='best', fontsize=20, ncol=4, markerscale = 1.5, columnspacing=0.8, 
             handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')
        ax[r,c].set_ylabel("$\mathcal{E}^{(i)}_{ML}$")
    elif c==0:
        ax[r,c].set_ylabel("$\mathcal{E}^{(i)}_{POD-ML}$")
    r+=1

fig.supxlabel('Pressure (kPa)', x=0.55, y=0.05)
fig.tight_layout(pad=2.0)
fig.savefig(r"{path}_err_vs_pressure.eps".format(path=data.data_path))

In [None]:
# This block plots a subset of the above block's plots to abridge results for presentations

xplot = np.array(data.x[:,3]).flatten()
start_ind = np.sort(np.argsort(xplot)[:8])
stop_ind = np.hstack([start_ind[1:], len(xplot)])
marker = ["o", "^", "s", "v", "P", "*", "D", "p"]
color = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#FFE933', '#e377c2', '#33E6FF']

fig, ax = plt.subplots(2,2, figsize=(15,11), dpi=600)
df_temp = snap_err_frame.reindex([6, 0, 1, 8, 2, 7, 3, 9, 4, 5])

r=0
c=0
for f in range(len(snap_err_frame)):
    row = df_temp.iloc[f]
    E = np.array(row.iloc[2:].values, dtype=float)
    
    if (row["POD_dim"] == 1 or row["POD_dim"] > 10):
        continue
        
    if row["POD_dim"] == 0:
        title = "{nn}".format(nn=row["NN_type"])
    else:
        title = "{nn} POD $l={L}$".format(nn=row["NN_type"], L=row["POD_dim"])
        l=int(row.iloc[1])
    
    if (row["NN_type"] == "CNN" and row["POD_dim"]==0):
        r=0
        c=1
    
    for i in range(len(stop_ind)):
        label = "$\mu_{b}$".format(b=i+1)    
        ax[r,c].semilogy(xplot[start_ind[i]:stop_ind[i]], E[start_ind[i]:stop_ind[i]], color=color[i], marker=marker[i], ms=6, linestyle='', label=label)

    ax[r,c].set_title(title)
    ax[r,c].set_xlim([-2, 53])
    ax[r,c].set_xticks(np.array(range(0,51,5)))
    ax[r,c].set_ylim([float(f'1e-3'), float(f'1e0')])
    
    if r+c == 0:
        ax[r,c].legend(loc='best', fontsize=20, ncol=4, markerscale = 1.5, columnspacing=0.8, 
             handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')
        ax[r,c].set_ylabel("$\mathcal{E}^{(i)}_{ML}$")
    elif c==0:
        ax[r,c].set_ylabel("$\mathcal{E}^{(i)}_{POD-ML}$")
    r+=1

fig.supxlabel('Pressure (kPa)', x=0.53, y=0.1)
fig.tight_layout(pad=2.0)
fig.savefig(r"{path}_err_vs_pressure_sub_figure.eps".format(path=data.data_path))


In [None]:
# Plot training and validation loss for cross-validation procedures from ML and selected POD-ML models

train_loss = train_loss_frame.groupby(["NN_type", "POD_dim", "k-fold"]).sum()
val_loss = val_loss_frame.groupby(["NN_type", "POD_dim", "k-fold"]).sum()

dim_list = ["0","1","10","325"]
marker = ["o", "^", "s", "v", "P", "*", "D", "p"]
color = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#FFE933', '#e377c2', '#33E6FF']

fig_dnn, ax_dnn = plt.subplots(4,2, figsize=(15,20), dpi=600)
# ax_dnn=ax_dnn.flatten()

fig_cnn, ax_cnn = plt.subplots(4,2, figsize=(15,20), dpi=600)
# ax_cnn=ax_cnn.flatten()

ylims = [[1e-2, 2e0], [1e-1, 1e3], [1e-1, 1e2], [1e-1, 2e1]]
titles = ["ML", "POD-ML $l=1$", "POD-ML $l=25$", "POD-ML $l=325$"]

d=0
c=0
for nn, dim in product(NN_types, dim_list):
    temp0 = train_loss.loc[nn, dim]
    temp1 = val_loss.loc[nn, dim]
    
    if nn == "DNN":
        ax = ax_dnn

        ax[d, 0].set_ylabel('Training Loss')
        ax[d, 1].set_ylabel('Validation Loss')
        ax[d, 0].set_xlim([-100, 1124])
        ax[d, 1].set_xlim([-100, 1124])
        
        ax[d, 0].set_title(titles[d])
        ax[d, 1].set_title(titles[d])

        ax[d, 0].set_ylim(ylims[d])
        ax[d, 1].set_ylim(ylims[d])
        for i in range(1,6):
            
            ax[d, 0].semilogy([q for q in range(1,1025)], temp0.loc[i].to_numpy(dtype=float), color=color[i], marker=marker[i-1], ms=5, linestyle='', label="Model {i}".format(i=i))
            ax[d, 1].semilogy([q for q in range(1,1025)], temp0.loc[i].to_numpy(dtype=float), color=color[i], marker=marker[i-1], ms=5, linestyle='', label="Model {i}".format(i=i))

        if d == 0:
            ax[d, 0].legend(loc='best', fontsize=20, ncol=2, markerscale = 1.5, columnspacing=0.8, 
                 handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')

        d += 1
    elif nn == "CNN":
        ax = ax_cnn
        
        ax[c, 0].set_ylabel('Training Loss')
        ax[c, 1].set_ylabel('Validation Loss')
        ax[c, 0].set_xlim([-100, 1124])
        ax[c, 1].set_xlim([-100, 1124])
        
        ax[c, 0].set_title(titles[c])
        ax[c, 1].set_title(titles[c])
            
        ax[c, 0].set_ylim(ylims[c])
        ax[c, 1].set_ylim(ylims[c])
        for i in range(1,6):
            ax[c, 0].semilogy([q for q in range(1,1025)], temp0.loc[i].to_numpy(dtype=float), marker=marker[i-1], ms=5, linestyle='', label="Model {i}".format(i=i))
            ax[c, 1].semilogy([q for q in range(1,1025)], temp0.loc[i].to_numpy(dtype=float), marker=marker[i-1], ms=5, linestyle='', label="Model {i}".format(i=i))

        if c == 0:
            ax[c, 0].legend(loc='best', fontsize=20, ncol=2, markerscale = 1.5, columnspacing=0.8, 
                 handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')
        c += 1

        
fig_dnn.supxlabel('Epoch', x=0.53, y=0.05)
fig_cnn.supxlabel('Epoch', x=0.53, y=0.05)

fig_dnn.tight_layout(pad=2.0)
fig_cnn.tight_layout(pad=2.0)

fig_dnn.savefig(r"{path}_loss_dnn.eps".format(path=data.data_path))
fig_cnn.savefig(r"{path}_loss_cnn.eps".format(path=data.data_path))

In [None]:
# Plot tear length versus pressure across all parameter cases 
# and output averages and standard deviations of final tear length and tear length at initial propagation
from scipy import stats
fig, ax = plt.subplots(1,1, figsize=(15,7.5), dpi=600)

xplot = np.array(data.x[:,3]).flatten()
start_ind = np.sort(np.argsort(xplot)[:8])
stop_ind = np.hstack([start_ind[1:], len(xplot)])

marker = ["o", "^", "s", "v", "P", "*", "D", "p"]
color = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#FFE933', '#e377c2', '#33E6FF']

avg_a = []
tp_init=[]
for i in range(len(stop_ind)):
    ax.plot(data.x[start_ind[i]:stop_ind[i],3], 
               data.x[start_ind[i]:stop_ind[i],4]+data.x[start_ind[i]:stop_ind[i],5], color=color[i], 
               marker=marker[i], ms=6, linestyle='', label="$\mu_{j}$".format(j=i+1))
    avg_a.append(data.x[start_ind[i]:stop_ind[i],4][-1]+data.x[start_ind[i]:stop_ind[i],5][-1])
    tp_init.append(stats.mode(data.x[start_ind[i]:stop_ind[i],4])[0]+stats.mode(data.x[start_ind[i]:stop_ind[i],4])[0])
    
avg_a = np.array(avg_a)
tp_init = np.array(tp_init)

print(np.mean(avg_a), np.std(avg_a))
print(np.mean(tp_init), np.std(tp_init))
ax.set_ylabel('Tear Length (mm)')
ax.set_xlabel('Pressure (kPa)')
ax.set_xlim([-2, 53])
ax.set_xticks([_ for _ in range(0,51,5)])
ax.legend(loc='best', fontsize=18, ncol=2, markerscale = 1.5, columnspacing=0.8, 
             handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')
fig.tight_layout(pad=2.0)

fig.savefig(r"{path}_tear_length_vs_pressure.eps".format(path=data.data_path))


# Plot stress concentration factors versus pressure for all parameter cases
# and output final stress concentration factors at proximal and distal heads fo the tear
fig, ax = plt.subplots(1,2, figsize=(15,7.5), dpi=600)

avg_d = []
avg_p = []
for i in range(len(stop_ind)):
    ax[0].plot(data.x[start_ind[i]:stop_ind[i],3], data.x[start_ind[i]:stop_ind[i],11], color=color[i], marker=marker[i], ms=6, linestyle='', label="$\mu_{j}$".format(j=i+1))
    ax[1].plot(data.x[start_ind[i]:stop_ind[i],3], data.x[start_ind[i]:stop_ind[i],12], color=color[i], marker=marker[i], ms=6, linestyle='', label="$\mu_{j}$".format(j=i+1))
    avg_d.append(data.x[start_ind[i]:stop_ind[i],11][-1])
    avg_p.append(data.x[start_ind[i]:stop_ind[i],12][-1])

avg_d = np.array(avg_d)
avg_p = np.array(avg_p)
print(np.mean(avg_d), np.std(avg_d))
print(np.mean(avg_p), np.std(avg_p))
    
ax[0].set_ylabel('$SCF_d$')
ax[1].set_ylabel('$SCF_p$') 
    
for c in [0,1]:

    ax[c].set_xlim([-2, 53])
    ax[c].set_xticks([_ for _ in range(0,51,5)])
    ax[c].set_ylim([2, 12])

ax[0].legend(loc='best', fontsize=18, ncol=2, markerscale = 1.5, columnspacing=0.8, 
             handletextpad=0.1, borderpad=0.3, framealpha=1, edgecolor='black')

fig.supxlabel("Pressure (kPa)", y=0.15, x=0.52)
fig.tight_layout(pad=2.0)
fig.savefig(r"{path}_SCF_vs_pressure.eps".format(path=data.data_path))