**Simultaneous Training of Unconstrained Hybrid Gaussian RAdial Basis Neural Networks (GRAB-NN) Models**

This code demonstrates the simultaneous training algorithms developed for hybrid GRAB-NN models by considering a monolithic approach of estimating all model parameters by IPOPT. The hidden layer consists of two different types of nodes -- namely, the ANN nodes with sigmoid activation function and the RBF nodes with Gaussian activation function. All model parameters include the coordinates of centers and widths for the RBF nodes in hidden layer, connection weights for the ANN in the hidden layer and the output layer weights for the overall GRAB-NN model.

The model structure can be optimized simultaneously / sequentially by the MINLP approaches proposed in this work. However, this code simply compares the predictive performances of different combinations of model architectures by comprehensive enumeration for a fixed size / structure of the network, i.e., for a fixed total number of hidden layer nodes in the overall GRAB-NN model. The sequence of ANN and RBF nodes in the hidden layer is immaterial since the network architecture represents a fully-connected NN model.

*Load the training and validation datasets and specify the input and output variables for the RBF models. Note that the user can consider any dynamic dataset for training and validation. The rows signify the time steps for transient data and the columns signify the input and output variables.*

*The nonlinear dynamic continuous stirred tank reactor (CSTR) system is chosen for demonstration of the proposed approach.*

In [1]:
import numpy as np
import scipy as sp
import pyomo.environ as pyo
from pyomo.opt import SolverFactory
from idaes.core.solvers import get_solver
get_solver()
import matplotlib.pyplot as plt
import pandas as pd
from pyDOE import *
import math as mt
import time
import json
import pickle

In [2]:
# Loading the data for model development

data = pd.read_excel("Dynamic CSTR Data.xlsx","Data", header=None).values
data = data[2:1202, 1:]

# For this specific system, the first five columns are the model inputs and the following four columns are the model outputs

input_data = data[:,0:5]
output_data = data[:,5:]

In [None]:
def RBFANN_optim(tn,ni,no,Imat_t,dsr_t,nh_RBF,nh_ANN):

    nh = nh_RBF + nh_ANN
    
    # Setting up the optimization problem

    M = pyo.ConcreteModel()

    M.I1 = pyo.RangeSet(1, ni)
    M.I2 = pyo.RangeSet(1, nh_RBF)
    M.I3 = pyo.RangeSet(1, 1)
    M.I4 = pyo.RangeSet(1, no)
    M.I5 = pyo.RangeSet(1, tn)
    M.I6 = pyo.RangeSet(1, nh_ANN)
    M.I7 = pyo.RangeSet(1, nh)
    
    M.x1 = pyo.Var(M.I1,M.I2, bounds = (1e-3,2.5), initialize = 0.5)  # centermat (RBF)
    M.x2 = pyo.Var(M.I3, bounds = (1e-3,4), initialize = 1)           # sigma (RBF)
    M.x3 = pyo.Var(M.I1,M.I6, bounds = (-1e5,1e5))                   # w_h (ANN)
    # M.x4 = pyo.Var(M.I6, bounds = (-1e5,1e5))                        # b_h (ANN)
    M.x5 = pyo.Var(M.I7,M.I4,bounds = (-1e5,1e5))                    # w_o (RBF+ANN)

    M.y1 = pyo.Var(M.I2,M.I5)                     # PhiofD / y_h (RBF)
    M.y2 = pyo.Var(M.I6,M.I5)                     # y_h (ANN)
    M.y3 = pyo.Var(M.I7,M.I5)                     # y_h (RBFANN)
    M.y4 = pyo.Var(M.I4,M.I5)                     # y_RBFANN
    
    @M.Expression(M.I2, M.I5)
    def D(M,i,j):
        return (sum((Imat_t[k-1,j-1] - M.x1[k,i])**2 for k in M.I1))**0.5   
    
    def constraint_rule_1(M,i,j):
        return M.y1[i,j] == (1/(pyo.sqrt(2*mt.pi*M.x2[1]**2)))*pyo.exp(-(M.D[i,j] * M.D[i,j])/(2*M.x2[1]**2)) 
   
    M.constraint_1 = pyo.Constraint(M.I2, M.I5, rule = constraint_rule_1)

    def constraint_rule_2(M,i,j):
        return M.y2[i,j] == 1/(1 + pyo.exp(-sum((M.x3[k,i] * Imat_t[k-1,j-1]) for k in M.I1)))
    
    M.constraint_2 = pyo.Constraint(M.I6, M.I5, rule = constraint_rule_2)

    def constraint_rule_3(M,i,j):
        if (i <= nh_RBF):
            return M.y3[i,j] == M.y1[i,j]
        else:
            return M.y3[i,j] == M.y2[i-nh_RBF,j]

    M.constraint_3 = pyo.Constraint(M.I7, M.I5, rule = constraint_rule_3)          
        
    def constraint_rule_4(M,i,j):
        return M.y4[i,j] == sum((M.x5[k,i] * M.y3[k,j]) for k in M.I7)
    
    M.constraint_4 = pyo.Constraint(M.I4, M.I5, rule = constraint_rule_4)
    
    def GRBF_optim_det(M):             
        obj_value = sum(sum((dsr_t[i-1,j-1] - M.y4[i,j]) ** 2 for i in M.I4) for j in M.I5)
        return obj_value
    
    M.obj = pyo.Objective(rule = GRBF_optim_det, sense = pyo.minimize)
    
    solver = pyo.SolverFactory('ipopt')
    solver.options['max_iter'] = 500
        
    results = solver.solve(M, tee = True)

    yRBFANN = np.zeros((no,tn))
    for (i,j) in M.y4:
        yRBFANN[i-1,j-1] = pyo.value(M.y4[i,j])
    
    yRBFANNt = yRBFANN.T
    dsr_tt = dsr_t.T

    eps = np.transpose(dsr_tt - yRBFANNt)
    R = 0.1*np.eye(no)

    kh_RBF = ni*nh_RBF + 1
    kh_ANN = ni*nh_ANN
    ko_RBFANN = nh*no
    
    k = kh_RBF + kh_ANN + ko_RBFANN
    
    n_AIC = tn; m_AIC = no;
    
    llh = (-n_AIC*m_AIC/2)*np.log(2*np.pi) - 0.5*n_AIC*np.log(np.linalg.det(R)) - 0.5*np.dot(np.dot(np.sum(eps, axis=1), \
                np.linalg.inv(R)),  np.transpose(np.sum(eps, axis=1)))
    
    obj_AICc = -2*llh + 2*k + 2*k*(k+1)/(n_AIC - k - 1)

    mse_tr = np.mean((np.divide((dsr_tt - yRBFANNt),dsr_tt))**2)
    
    return [obj_AICc,k,mse_tr]

**Training standalone ANN and RBF models for extreme case combinations**

In [None]:
# For Pure ANN Training

def Pure_ANN_optim(tn,ni,no,Imat_t,dsr_t,nh_ANN):

    nh = nh_ANN
    
    # Setting up the optimization problem

    M = pyo.ConcreteModel()

    M.I1 = pyo.RangeSet(1, ni)
    M.I2 = pyo.RangeSet(1, nh)
    M.I3 = pyo.RangeSet(1, no)
    M.I4 = pyo.RangeSet(1, tn)
    
    M.x1 = pyo.Var(M.I1,M.I2, bounds = (-1e5,1e5))                   # w_h (ANN)
    # M.x2 = pyo.Var(M.I2, bounds = (-1e5,1e5))                        # b_h (ANN)
    M.x3 = pyo.Var(M.I2,M.I3,bounds = (-1e5,1e5))                    # w_o (ANN)

    M.y1 = pyo.Var(M.I2,M.I4)                     # y_h (ANN)
    M.y2 = pyo.Var(M.I3,M.I4)                     # y_ANN
    
    def constraint_rule_1(M,i,j):
        return M.y1[i,j] == 1/(1 + pyo.exp(-sum((M.x1[k,i] * Imat_t[k-1,j-1]) for k in M.I1)))
    
    M.constraint_1 = pyo.Constraint(M.I2, M.I4, rule = constraint_rule_1)         
        
    def constraint_rule_2(M,i,j):
        return M.y2[i,j] == sum((M.x3[k,i] * M.y1[k,j]) for k in M.I2)
    
    M.constraint_2 = pyo.Constraint(M.I3, M.I4, rule = constraint_rule_2)
    
    def GRBF_optim_det(M):             
        obj_value = sum(sum((dsr_t[i-1,j-1] - M.y2[i,j]) ** 2 for i in M.I3) for j in M.I4)
        return obj_value
    
    M.obj = pyo.Objective(rule = GRBF_optim_det, sense = pyo.minimize)
    
    solver = pyo.SolverFactory('ipopt')
    solver.options['max_iter'] = 500
    
    results = solver.solve(M, tee = True)
    
    yANN = np.zeros((no,tn))
    for (i,j) in M.y2:
        yANN[i-1,j-1] = pyo.value(M.y2[i,j])
    
    yANNt = yANN.T
    dsr_tt = dsr_t.T

    eps = np.transpose(dsr_tt - yANNt)
    R = 0.1*np.eye(no)

    kh_ANN = ni*nh + nh*no
    
    k = kh_ANN 
    
    n_AIC = tn; m_AIC = no;
    
    llh = (-n_AIC*m_AIC/2)*np.log(2*np.pi) - 0.5*n_AIC*np.log(np.linalg.det(R)) - 0.5*np.dot(np.dot(np.sum(eps, axis=1), \
                np.linalg.inv(R)),  np.transpose(np.sum(eps, axis=1)))
    
    obj_AICc = -2*llh + 2*k + 2*k*(k+1)/(n_AIC - k - 1)

    mse_tr = np.mean((np.divide((dsr_tt - yANNt),dsr_tt))**2)
    
    return [obj_AICc,k,mse_tr]

In [None]:
# For Pure RBF Training

def Pure_RBF_optim(tn,ni,no,Imat_t,dsr_t,nh_RBF):

    nh = nh_RBF

    # Setting up the optimization problem

    M = pyo.ConcreteModel()

    M.I1 = pyo.RangeSet(1, ni)
    M.I2 = pyo.RangeSet(1, nh)
    M.I3 = pyo.RangeSet(1, 1)
    M.I4 = pyo.RangeSet(1, no)
    M.I5 = pyo.RangeSet(1, tn)
    
    M.x1 = pyo.Var(M.I1,M.I2, bounds = (1e-3,2.5), initialize = 0.5)  # centermat (RBF)
    M.x2 = pyo.Var(M.I3, bounds = (1e-3,4), initialize = 1)           # sigma (RBF)
    M.x3 = pyo.Var(M.I2,M.I4,bounds = (-1e5,1e5))                    # w_o (RBF)

    M.y1 = pyo.Var(M.I2,M.I5)                     # PhiofD / y_h (RBF)
    M.y2 = pyo.Var(M.I4,M.I5)                     # y_RBFANN
    
    @M.Expression(M.I2, M.I5)
    def D(M,i,j):
        return pyo.sqrt(sum((Imat_t[k-1,j-1] - M.x1[k,i])**2 for k in M.I1))   
    
    def constraint_rule_1(M,i,j):
        return M.y1[i,j] == (1/pyo.sqrt(2*mt.pi*M.x2[1]**2))*pyo.exp(-(M.D[i,j] * M.D[i,j])/(2*M.x2[1]**2)) 
    
    M.constraint_1 = pyo.Constraint(M.I2, M.I5, rule = constraint_rule_1)
        
    def constraint_rule_2(M,i,j):
        return M.y2[i,j] == sum((M.x3[k,i] * M.y1[k,j]) for k in M.I2)
    
    M.constraint_2 = pyo.Constraint(M.I4, M.I5, rule = constraint_rule_2)
    
    def GRBF_optim_det(M):             
        obj_value = sum(sum((dsr_t[i-1,j-1] - M.y2[i,j]) ** 2 for i in M.I4) for j in M.I5)
        return obj_value
    
    M.obj = pyo.Objective(rule = GRBF_optim_det, sense = pyo.minimize)
    
    solver = pyo.SolverFactory('ipopt')
    solver.options['max_iter'] = 500
    
    results = solver.solve(M, tee = True)
    
    yRBF = np.zeros((no,tn))
    for (i,j) in M.y2:
        yRBF[i-1,j-1] = pyo.value(M.y2[i,j])
        
    yRBFt = yRBF.T
    dsr_tt = dsr_t.T

    eps = np.transpose(dsr_tt - yRBFt)
    R = 0.1*np.eye(no)

    kh_RBF = ni*nh + 1 + nh*no    
    
    k = kh_RBF 
    
    n_AIC = tn; m_AIC = no;
    
    llh = (-n_AIC*m_AIC/2)*np.log(2*np.pi) - 0.5*n_AIC*np.log(np.linalg.det(R)) - 0.5*np.dot(np.dot(np.sum(eps, axis=1), \
                np.linalg.inv(R)),  np.transpose(np.sum(eps, axis=1)))
    
    obj_AICc = -2*llh + 2*k + 2*k*(k+1)/(n_AIC - k - 1)

    mse_tr = np.mean((np.divide((dsr_tt - yRBFt),dsr_tt))**2)
    
    return [obj_AICc,k,mse_tr] 

In [None]:
# Specification of Model Inputs and Target Outputs

data = np.concatenate((input_data, output_data), axis = 1)
ni = input_data.shape[1]
no = output_data.shape[1]
nt = ni+no

tt = data.shape[0]
tn = int(np.floor(0.7*tt))

# Normalizing the input and output variables

norm_mat = np.zeros((tt,nt))
delta = np.zeros((1,nt))
for i in range(nt):
    delta[:,i] = max(data[:,i]) - min(data[:,i])
    norm_mat[:,i] = (data[:,i] - min(data[:,i]))/delta[0,i]

Imat = norm_mat[:,0:ni]; Imat = Imat.transpose();
dsr = norm_mat[:,ni:ni+no]; dsr = dsr.transpose();

# TRAINING OF RBFNN

tr_steps = np.random.choice(tt, tn, replace=False)
# tr_steps = np.array([1:tn])
tr_steps = np.sort(tr_steps) 

dsr_t = np.zeros((no,tn))
Imat_t = np.zeros((ni,tn))

for i in range(tn):
    ts = tr_steps[i]
    dsr_t[:,i] = dsr[:,ts]
    Imat_t[:,i] = Imat[:,ts]

In [None]:
# Comprehensive Enumeration of all possible combinations of RBF and ANN nodes in the Hidden Layer

# Specify the size of the hidden layer (i.e., number of nodes)

nh = 15

check = 1e6

k_v = []; mse_v = [];

target_unnorm = np.zeros((tn,no))
for i in range(no):
    target_unnorm[:,i] = np.transpose(dsr_t[i])*delta[0,ni+i] + min(data[:,ni+i])

for i in range(nh+1):
    nh_ANN = i
    nh_RBF = nh - nh_ANN

    if (nh_RBF == 0):
        [AICc,k,mse] = Pure_ANN_optim(tn,ni,no,Imat_t,dsr_t,nh_ANN)
    elif (nh_ANN == 0):
        [AICc,k,mse] = Pure_RBF_optim(tn,ni,no,Imat_t,dsr_t,nh_RBF)
    else:
        [AICc,k,mse] = RBFANN_optim(tn,ni,no,Imat_t,dsr_t,nh_RBF,nh_ANN)

    AICc_v.append(AICc)
    k_v.append(k)
    mse_v.append(mse)

In [None]:
# Evaluation of the optimal model architecture, i.e., optimal values of nh_ANN and nh_RBF

nh_ANN_opt = np.argmin(mse_v)
nh_RBF_opt = nh - nh_ANN_opt

print('Optimal GRABNN architecture ==> nh_ANN:', nh_ANN_opt, 'and nh_RBF:', nh_RBF_opt)