In [1]:

from gpcam import GPOptimizer

import numpy as np
from numpy.random import default_rng

import torch
from torch import nn

import matplotlib.pyplot as plt
import plotly.graph_objects as go

import os
import csv

import random

In [58]:
energy_data = np.load("/data/Synthetic Data Generation_1/my_synthetic_energy.npy")
cycle_number = np.load("/data/Synthetic Data Generation_1/my_synthetic_cycleNum.npy")



label_size = 30

num_of_datasets = 50

considered_batteries = np.array([5546, 9477, 2231, 4437, 7059, 5259, 8330, 1068, 8214, 5888, 3275, 6845, 7671, 
                         299, 5038, 3503, 8673, 2236, 3644, 4980, 993, 7545, 654, 1418, 6090, 7936, 8792, 
                         6910, 2933, 2382, 9730, 8476, 1882, 7986, 7091, 4813, 3086, 3908, 1539, 8567, 2152, 
                         5738, 8646, 9692, 2661, 6766, 7230, 512, 758, 2881])



plt.figure(figsize = (20,10))
for i in considered_batteries: plt.scatter(cycle_number,energy_data[int(i)])

plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlabel("Cycle Number",fontsize=label_size)
plt.ylabel("Quantity of Interest",fontsize=label_size)
plt.show()

print("max y: ", np.max(energy_data))

# All Data
x_data_all = np.tile(cycle_number, (num_of_datasets, 1)).reshape(-1, 1) # repeat cycle 20 times to create x_data
y_data_all = np.vstack(energy_data[considered_batteries, :]).reshape(-1, 1)
x_pred = np.linspace(0,1000,1001).reshape(-1,1)

print("x data: ", x_data_all.shape)
print("y data: ", y_data_all.shape)
           
#plt.scatter(x_data,y_data)


max y:  529.5870098158235
x data:  (2500, 1)
y data:  (2500, 1)


# Creating the Subfolder in Results

In [2]:

# Specify the path for the new folder
new_folder_path = f"/results/Introduction Figures"

# Create the folder
os.makedirs(new_folder_path, exist_ok=True)


# Defining the Standard GP Components

In [32]:
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)


def my_noise_stdrd(x,hps,obj):
    #This is a simple noise function, but can be arbitrarily complex using many hyperparameters.
    #The noise function always has to return a matrix, because the noise can have covariances.
    return np.diag(np.zeros((len(x))) + hps[2])

def kernel_stdrd(x1,x2,hps,obj):
    d = get_distance_matrix(x1,x2) 

    k = hps[0] * obj.squared_exponential_kernel(d,hps[1]) 
    return k


def mean_stdrd(x, hps, obj):
    return hps[3]


# Fitting the GP model

In [33]:
# In this synthetic data set, every battery has 50 data points, so if I want to take the data for the first 3 batteres I 
# take the first 150 data points; if I want the data for the first 5 batteries I take the first 250 data points ...


condition = x_data_all < 600
x_data = x_data_all[condition]
y_data = y_data_all[condition]

condition2 = x_data_all >= 600
x_data_hidden = x_data_all[condition2]
y_data_hidden = y_data_all[condition2]


# Finding the mean of the initial capacity to be entered to the code
my_ind = np.where(x_data<=10)
initial_capacity = np.mean(y_data[my_ind[0]])

init_hyperparameters = np.array([150, 250,   # Kernel
                                  100, 250])                  # Mean.


# Setting the Optimization Bounds for Hyperparameters
bounds = np.empty((4,2))
# Kernel Sq Exp 
bounds[0] = np.array([100.,10000.])                         # Kernel Variance
bounds[1] = np.array([10.,400.])                           # Kernel Lengthscale
# Noise
bounds[2] = np.array([1e-5,500.])                            # Noise Slope
#Mean
bounds[3] = np.array([1.,500.])                              # Noise Power


trained_hps = np.array([421.74924913, 103.66812027,  83.73293929, 476.92930516])

my_gpo = GPOptimizer(x_data,y_data,
            init_hyperparameters = trained_hps,  # we need enough of those for kernel, noise and prior mean functions
            #noise_variances=np.ones(y_data.shape) * 0.01, #providing noise variances and a noise function will raise a warning 
            compute_device='cpu', 
            gp_kernel_function=kernel_stdrd, 
            gp_kernel_function_grad=None, 
            gp_mean_function=mean_stdrd, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise_stdrd,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args=np.array([initial_capacity]),
            )

#my_gpo.train(hyperparameter_bounds=bounds, method='global')

print("Training is Done!")

Training is Done!


In [34]:
x_pred =np.linspace(0,2000,1001).reshape(-1,1)

mean = my_gpo.posterior_mean(x_pred.reshape(-1,1))["f(x)"]
var =  my_gpo.posterior_covariance(x_pred.reshape(-1,1), variance_only=False, add_noise=True)["v(x)"]

print(my_gpo.log_likelihood(my_gpo.hyperparameters))
print(my_gpo.hyperparameters)

my_color = np.array([102, 178, 255])/255  

plt.figure(figsize = (10,10))
plt.plot(x_pred,mean, color="red", label = "Posterior Mean", linewidth = 4)
plt.fill_between(np.squeeze(x_pred), mean - 2. * np.sqrt(var), mean + 2. * np.sqrt(var), alpha = 0.5, color = "grey", label = "Posterior Covariance")
plt.scatter(x_data,y_data,s = 75, color='blue', label = "Training")
plt.scatter(x_data_hidden,y_data_hidden,s = 75,color=[my_color], label = "Testing")
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.legend(fontsize=label_size,frameon=False,loc='lower left',markerscale=2)
plt.xlim(0,1000)
plt.xticks([])

plt.ylim([250,520])
plt.yticks([])
plt.xlabel("Cycle Number",fontsize=label_size+4)
plt.ylabel("Energy",fontsize=label_size+4)
plt.savefig('/results/Introduction Figures/Standard_GP.png', dpi=300)


-5468.723098021589
[421.74924913 103.66812027  83.73293929 476.92930516]


# ---------------------------------------------------------------------------------------------------------------
# Modified GP Model - Mean
# ---------------------------------------------------------------------------------------------------------------

In [55]:
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)


def my_noise_stdrd(x,hps,obj):
    #This is a simple noise function, but can be arbitrarily complex using many hyperparameters.
    #The noise function always has to return a matrix, because the noise can have covariances.
    return np.diag(np.zeros((len(x))) + hps[2])


def kernel_stdrd(x1,x2,hps,obj):
    d = get_distance_matrix(x1,x2) 

    k = hps[0] * obj.squared_exponential_kernel(d,hps[1]) 
    return k


# Mean function: Two-Element piecewise function
def mean2(x,hps,obj):

    x0 = hps[3]
    
    m1 = hps[4]
    m2 = hps[5]

    b1  = 500
    b2 = (m1 - m2) * x0 + b1

    x = x[:,0]

    y = np.where(x <= x0, m1*x + b1, m2*x + b2)
                
    return y


In [56]:
# In this synthetic data set, every battery has 50 data points, so if I want to take the data for the first 3 batteres I 
# take the first 150 data points; if I want the data for the first 5 batteries I take the first 250 data points ...


# Finding the mean of the initial capacity to be entered to the code
my_ind = np.where(x_data <=10)
initial_capacity = np.mean(y_data[my_ind[0]])

init_hyperparameters = np.array([150, 250,                    # Kernel
                                  50,                        # Noise
                                  250,-0.01,-0.015])          # Mean

#trained_hps = np.array([421.74924913, 103.66812027,  83.73293929, 476.92930516])

# Setting the Optimization Bounds for Hyperparameters
bounds = np.empty((6,2))
# Kernel Sq Exp 
bounds[0] = np.array([100.,10000.])                         # Kernel Variance
bounds[1] = np.array([10.,400.])                           # Kernel Lengthscale
# Noise
bounds[2] = np.array([1e-5,500.])                            # Noise Slope
#Mean
bounds[3] = np.array([200.,700.])                          # Mean Piecewise Intersection point
bounds[4] = np.array([-1e-1,-1e-3])                        # Mean Slope 1
bounds[5] = np.array([-5e-1,-1e-3])                        # Mean Slope 2

trained_hps = np.array([ 1.02857201e+02,  3.98949826e+02,  8.28074295e+01,  5.23824852e+02,
 -4.60043682e-02, -3.70545114e-01])

my_gpMean = GPOptimizer(x_data,y_data,
            #init_hyperparameters = init_hyperparameters,  # we need enough of those for kernel, noise and prior mean functions
            init_hyperparameters = trained_hps,  # we need enough of those for kernel, noise and prior mean functions
            #noise_variances=np.ones(y_data.shape) * 0.01, #provding noise variances and a noise function will raise a warning 
            compute_device='cpu', 
            gp_kernel_function=kernel_stdrd, 
            gp_kernel_function_grad=None, 
            gp_mean_function=mean2, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise_stdrd,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args=np.array([initial_capacity]),
            )

#my_gpMean.train(hyperparameter_bounds=bounds, method='global')

print("Training is Done!")

Training is Done!


In [57]:
mean_Mean = my_gpMean.posterior_mean(x_pred.reshape(-1,1))["f(x)"]
var_Mean =  my_gpMean.posterior_covariance(x_pred.reshape(-1,1), variance_only=False, add_noise=True)["v(x)"]

print(my_gpMean.log_likelihood(my_gpMean.hyperparameters))
print(my_gpMean.hyperparameters)


plt.figure(figsize = (10,10))
plt.plot(x_pred,mean_Mean, color="red", label = "Posterior Mean", linewidth = 4)
plt.fill_between(np.squeeze(x_pred), mean_Mean - 2. * np.sqrt(var_Mean), mean_Mean + 2. * np.sqrt(var_Mean), alpha = 0.5, color = "grey", label = "Posterior Variance")
plt.scatter(x_data,y_data,color='blue')
plt.scatter(x_data_hidden,y_data_hidden,color=[my_color])
#plt.legend(fontsize=label_size,frameon=False,loc='lower left')
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlim(0,1000)
plt.xticks([])

plt.ylim([250,520])
plt.yticks([])
#plt.xlabel("Cycle Number",fontsize=label_size)
plt.savefig('/results/Introduction Figures/Mean Modified GP.pdf', bbox_inches='tight')

-5449.694273500741
[ 1.02857201e+02  3.98949826e+02  8.28074295e+01  5.23824852e+02
 -4.60043682e-02 -3.70545114e-01]


## Plotting Prior Mean 

In [38]:
my_hyperparameter2 = my_gpMean.hyperparameters

my_prior_mean = mean2(x_pred,my_hyperparameter2,my_gpMean)

plt.figure(figsize = (10,10))
plt.plot(x_pred,my_prior_mean, color="black", label = " ", linewidth = 4)
#plt.plot(x_pred,mean_Mean, color="red", label = "Posterior Mean", linewidth = 4)

plt.scatter(x_data,y_data, color="blue")
plt.scatter(x_data_hidden,y_data_hidden,color=[my_color])

#plt.title("Trained Prior Mean")
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.legend(fontsize=label_size,frameon=False,loc='lower left')
plt.xlim(0,1000)
plt.xticks([])

plt.ylim([250,520])
plt.yticks([])
#plt.title("Fitted Model",fontsize=label_size)
#plt.xlabel("Cycle Number",fontsize=label_size)
#plt.ylabel("Quantity of Interest",fontsize=label_size)
plt.savefig('/results/Introduction Figures/Modified GP Prior Mean.pdf', bbox_inches='tight')

# ---------------------------------------------------------------------------------------------------------------
# Modified GP Model - Noise
# ---------------------------------------------------------------------------------------------------------------

In [39]:
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)


def my_noise(x,hps,obj):

    my_slope     = hps[2]
    my_pow       = hps[3]
    my_intercept = hps[4]

    my_s =  my_slope * x**my_pow + my_intercept

    noise = np.diag(my_s[:,0])
    
    return noise

def kernel_stdrd(x1,x2,hps,obj):
    d = get_distance_matrix(x1,x2) 

    k = hps[0] * obj.squared_exponential_kernel(d,hps[1]) 
    return k


# Mean function: Two-Element piecewise function
def mean2(x,hps,obj):


    x0 = hps[5]
    
    m1 = hps[6]
    m2 = hps[7]

    b1  = 500
    b2 = (m1 - m2) * x0 + b1

    x = x[:,0]

    y = np.where(x <= x0, m1*x + b1, m2*x + b2)
                
    return y

In [40]:
# In this synthetic data set, every battery has 50 data points, so if I want to take the data for the first 3 batteres I 
# take the first 150 data points; if I want the data for the first 5 batteries I take the first 250 data points ...


# Finding the mean of the initial capacity to be entered to the code
my_ind = np.where(x_data<=10)
initial_capacity = np.mean(y_data[my_ind[0]])

init_hyperparameters = np.array([150, 250,                    # Kernel
                                  0.06, 2, 2,                 # Noise
                                  250,-0.01,-0.015])          # Mean

#trained_hps = np.array([421.74924913, 103.66812027,  83.73293929, 476.92930516])

# Setting the Optimization Bounds for Hyperparameters
bounds = np.empty((8,2))
# Kernel Sq Exp 
bounds[0] = np.array([100.,10000.])                         # Kernel Variance
bounds[1] = np.array([10.,400.])                           # Kernel Lengthscale
# Noise
bounds[2] = np.array([1e-5,1.])                           # Noise Slope
bounds[3] = np.array([1.,5.])                            # Noise Power
bounds[4] = np.array([0.,3.])                              # Noise Intercept
#Mean
bounds[5] = np.array([200.,700.])                          # Mean Piecewise Intersection point
bounds[6] = np.array([-1e-1,-1e-3])                        # Mean Slope 1
bounds[7] = np.array([-5e-1,-1e-3])                        # Mean Slope 2

trained_hps = np.array([ 1.05086385e+02,  3.98289651e+02,  2.11823751e-05,  2.56705587e+00,
  2.90357790e+00,  5.24274178e+02, -5.65283589e-02, -3.53173534e-01])

my_gpNoise = GPOptimizer(x_data,y_data,
            #init_hyperparameters = init_hyperparameters,  # we need enough of those for kernel, noise and prior mean functions
            init_hyperparameters = trained_hps,  # we need enough of those for kernel, noise and prior mean functions
            #noise_variances=np.ones(y_data.shape) * 0.01, #provding noise variances and a noise function will raise a warning 
            compute_device='cpu', 
            gp_kernel_function=kernel_stdrd, 
            gp_kernel_function_grad=None, 
            gp_mean_function=mean2, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args=np.array([initial_capacity]),
            )

#my_gpNoise.train(hyperparameter_bounds=bounds, method='global')

print("Training is Done!")

Training is Done!


In [41]:
mean_Noise = my_gpNoise.posterior_mean(x_pred.reshape(-1,1))["f(x)"]
var_Noise =  my_gpNoise.posterior_covariance(x_pred.reshape(-1,1), variance_only=False, add_noise=True)["v(x)"]

print(my_gpNoise.log_likelihood(my_gpNoise.hyperparameters))
print(my_gpNoise.hyperparameters)


plt.figure(figsize = (10,10))
plt.plot(x_pred,mean_Noise, color="red", label = "Posterior Mean", linewidth = 4)
plt.fill_between(np.squeeze(x_pred), mean_Noise - 2. * np.sqrt(var_Noise), mean_Noise + 2. * np.sqrt(var_Noise), alpha = 0.5, color = "grey", label = "Posterior Variance")
plt.scatter(x_data,y_data,color="blue")
plt.scatter(x_data_hidden,y_data_hidden,color=[my_color])
#plt.legend(fontsize=label_size,frameon=False,loc='lower left')
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlim(0,1000)
plt.xticks([])

plt.ylim([250,520])
plt.yticks([])
#plt.xlabel("Cycle Number",fontsize=label_size)
#plt.ylabel("Quantity of Interest",fontsize=label_size)
plt.savefig('/results/Introduction Figures/Mean_Noise Modified GP.pdf', bbox_inches='tight')

-4835.075850887582
[ 1.05086385e+02  3.98289651e+02  2.11823751e-05  2.56705587e+00
  2.90357790e+00  5.24274178e+02 -5.65283589e-02 -3.53173534e-01]


## Plotting Noise Model

In [42]:
# Checking the variability trend in the data

variances = np.var(energy_data[considered_batteries], axis=0)
stds = np.sqrt(variances)

GT_hps = np.array([0,0,                      # kernel - Doesn't Matter
                   1.002e-05,  2.69, 3,      # Noise
                   500,-0.05,-0.2])          # Mean

GT_stds =  np.sqrt(np.diag(my_noise(cycle_number, GT_hps, my_gpNoise)))

predicted_variability = np.diag(my_noise(cycle_number, my_gpNoise.hyperparameters, my_gpNoise))

# Plotting the data
plt.figure(figsize = (10,10))
#plt.plot(cycle_number,stds, color = "blue", linewidth = 3, label = " ")
plt.plot(cycle_number,GT_stds, color = "blue", linewidth = 3,linestyle = '--', label = "Data")

plt.plot(cycle_number,np.sqrt(predicted_variability), color="black", linewidth = 3, label = "Noise Model")
plt.xlabel("Cycle Number",fontsize=label_size)
plt.ylabel("Energy Variability",fontsize=label_size)
plt.xlim(0,600)
#plt.xticks([])
plt.ylim([0,20])
plt.yticks([0,5,10,15,20])

#plt.yticks([])
plt.xticks([0,150,300,450,600])

plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.legend(fontsize=label_size,frameon=False,loc='upper left')
#plt.savefig('Modified GP Noise Model.pdf', bbox_inches='tight')
plt.savefig('/results/Introduction Figures/Fitted_noise_model.png', dpi=300)  # Specify the desired resolution (e.g., 300 dpi)


# ---------------------------------------------------------------------------------------------------------------
# Modified GP Model - All
# ---------------------------------------------------------------------------------------------------------------

# Defining the GP Components

## Deep Kernel Learning

In [43]:
# All remaining code asssumes that the NN architecture is made up of two hidden layers and same number of nodes
# If other architectures are used, the indices of the hyperparameters and their boudsn need to be changed accordingly
# Number of nodes can be varied as the user prefer

class Network(nn.Module):
    def __init__(self):
        super().__init__()

        self.nodes_num = 5

        # Inputs to hidden layer linear transformation
        self.layer1 = nn.Linear(1, self.nodes_num)
        self.layer2 = nn.Linear(self.nodes_num, self.nodes_num)
        self.layer3 = nn.Linear(self.nodes_num, 1)

    def forward(self, x):
        # Pass the input tensor through each of our operations
        #print(x)
        x = torch.Tensor(x)
        x = torch.nn.functional.relu(self.layer1(x))
        x = torch.nn.functional.relu(self.layer2(x))
        x = torch.nn.functional.relu(self.layer3(x))
        return x.detach().numpy()

    def set_weights(self,w1,w2,w3):
        with torch.no_grad():
            self.layer1.weight = nn.Parameter(torch.from_numpy(w1).float())
            self.layer2.weight = nn.Parameter(torch.from_numpy(w2).float())
            self.layer3.weight = nn.Parameter(torch.from_numpy(w3).float())

    def set_biases(self,b1,b2,b3):
        with torch.no_grad():
            self.layer1.bias = nn.Parameter(torch.from_numpy(b1).float())
            self.layer2.bias = nn.Parameter(torch.from_numpy(b2).float())
            self.layer3.bias = nn.Parameter(torch.from_numpy(b3).float())

    def get_weights(self):
        return self.layer1.weight, self.layer2.weight, self.layer3.weight
    def get_biases(self):
        return self.layer1.bias, self.layer2.bias, self.layer3.bias

n = Network()

## Remaining GP Components 

In [44]:
# For the squared Exponential Function
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)

def my_noise(x,hps,obj):

    total_num_of_NN_hps = obj.args[1]

    my_slope     = hps[total_num_of_NN_hps+1]
    my_pow       = hps[total_num_of_NN_hps+2]
    my_intercept = hps[total_num_of_NN_hps+3]

    my_s =  my_slope * x**my_pow + my_intercept

    noise = np.diag(my_s[:,0])
    
    return noise

# Kernel Function
def kernel_nn(x1,x2,hps,obj):

    nodes_num           = obj.args[0]
    total_num_of_NN_hps = obj.args[1]

    # NN
    n.set_weights(hps[0:nodes_num].reshape(nodes_num,1),
                  hps[nodes_num:nodes_num**2+nodes_num].reshape(nodes_num,nodes_num),
                  hps[nodes_num**2+nodes_num:nodes_num**2+2*nodes_num].reshape(1,nodes_num))

    n.set_biases(hps[nodes_num**2+2*nodes_num:nodes_num**2+3*nodes_num].reshape(nodes_num),
                 hps[nodes_num**2+3*nodes_num:nodes_num**2+4*nodes_num].reshape(nodes_num),
                 np.array([hps[nodes_num**2+4*nodes_num]]))

    x1_nn = n.forward(x1).reshape(-1,1)
    x2_nn = n.forward(x2).reshape(-1,1)
    d = get_distance_matrix(x1_nn,x2_nn)


    # Main Function
    k = hps[total_num_of_NN_hps] * obj.squared_exponential_kernel(d, 200) #100

    return k


################################################################################

# Mean function: Two-Element piecewise function
def mean2(x,hps,obj):

    total_num_of_NN_hps = obj.args[1]

    x0 = hps[total_num_of_NN_hps+4]
    
    m1 = hps[total_num_of_NN_hps+5]
    m2 = hps[total_num_of_NN_hps+6]

    b1  = 500
    b2 = (m1 - m2) * x0 + b1

    x = x[:,0]

    y = np.where(x <= x0, m1*x + b1, m2*x + b2)
                
    return y

# Training the Model with Few Data points

In [45]:
nodes_num = n.nodes_num

total_num_of_NN_hps = nodes_num**2 + 4*nodes_num + 1     # Depends on the number of layers used
num_of_other_hps = 7                                     # Depends on Kernel, noise and mean functions

NN_weights_initial_hps = np.random.uniform(-1, 1,nodes_num**2+2*nodes_num)
NN_biases_initial_hps = np.random.uniform(-250, 250,total_num_of_NN_hps-(nodes_num**2+2*nodes_num))


other_init_hps = np.array([200,                   # Kernel
                           0.06, 2, 2,            # Noise  
                           250,-0.01,-0.015])     # Mean.

init_hyperparameters = np.concatenate([NN_weights_initial_hps,NN_biases_initial_hps,other_init_hps])

# 
x_data_trial = np.array([x_data[0:300]]).reshape(-1,1)
y_data_trial = np.array([y_data[0:300]]).reshape(-1,1)


print("x data: ", x_data_trial.shape)
print("y data: ", y_data_trial.shape)


# Setting the Optimization Bounds for Hyperparameters
bounds = np.empty((total_num_of_NN_hps + num_of_other_hps,2))

# NN
bounds[0:nodes_num**2+2*nodes_num] = np.array([-1.,1.])                      # Weights NN: Define spread and shift in output
bounds[nodes_num**2+2*nodes_num:total_num_of_NN_hps] = np.array([-250.,250.])    # Biases of NN: Define shift in output

# Kernel Sq Exp 
bounds[total_num_of_NN_hps] = np.array([100.,10000.])                             # Kernel Variance
#bounds[total_num_of_NN_hps+7] = np.array([10.,300.])                           # Kernel Lengthscale

# Noise
bounds[total_num_of_NN_hps+1] = np.array([1e-5,1.])                           # Noise Slope
bounds[total_num_of_NN_hps+2] = np.array([1.,5.])                            # Noise Power
bounds[total_num_of_NN_hps+3] = np.array([0.,3.])                              # Noise Intercept
# Mean
bounds[total_num_of_NN_hps+4] = np.array([200.,700.])                          # Mean Piecewise Intersection point
bounds[total_num_of_NN_hps+5] = np.array([-1e-1,-1e-3])                        # Mean Slope 1
bounds[total_num_of_NN_hps+6] = np.array([-5e-1,-1e-3])                        # Mean Slope 2


## The Following are the trained hps when using the first 300 data points. 
## Do not train again!
trained_hps = np.array([-0.349,  0.461, -0.678,  0.604,  0.747,  0.827,  0.282,  0.126,  0.572, -0.313,
                   -0.463, -0.174, -0.619,  0.701, -0.109,  0.048, -0.318, -0.367, -0.499, -0.321,
                    0.083,  0.668, -0.209,  0.552, -0.355,  0.192,  0.772, -0.739,  0.959, -0.772,
                   -0.713, -0.817, -0.014,  0.077,  0.381, -196.313,  126.321,   45.903, -216.376,  
                    126.7, -86.184, -60.536, -108.548, 67.332,  162.204, -36.093, 102.949, 0.0005, 2.654, 2.373,
                    5.44103e+02, -5.30000e-02, -4.42000e-01])



my_gpNN = GPOptimizer(x_data_trial,y_data_trial,
            #init_hyperparameters = init_hyperparameters,  # we need enough of those for kernel, noise and prior mean functions
            init_hyperparameters = trained_hps,  # we need enough of those for kernel, noise and prior mean functions
             #noise_variances=np.ones(y_data.shape) * 0.01, #provding noise variances and a noise function will raise a warning 
            compute_device='cpu', 
            gp_kernel_function=kernel_nn, 
            gp_kernel_function_grad=None, 
            gp_mean_function=mean2, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args= np.array([nodes_num,total_num_of_NN_hps]),
            )

#my_gpNN.train(hyperparameter_boundsbounds, method='global')

print("Training is Done!")



x data:  (300, 1)
y data:  (300, 1)
Training is Done!


In [46]:
meanNN = my_gpNN.posterior_mean(x_pred.reshape(-1,1))["f(x)"]
varNN =  my_gpNN.posterior_covariance(x_pred.reshape(-1,1), variance_only=False, add_noise=True)["v(x)"]


plt.plot(x_pred,meanNN, color="red", label = "Posterior Mean", linewidth = 4)
plt.fill_between(np.squeeze(x_pred), meanNN - 2. * np.sqrt(varNN), meanNN + 2. * np.sqrt(varNN), alpha = 0.5, color = "grey", label = "Posterior Variance")
plt.scatter(x_data_trial,y_data_trial)
plt.legend()
plt.xlim([0,np.max(x_pred)])
plt.title("Fitted Model")
plt.xlabel("Cycle Number")
plt.ylabel("Energy")

Text(56.722222222222214, 0.5, 'Energy')

In [47]:
my_hyperparameter = my_gpNN.hyperparameters

print("NN Weights: ")
print(np.round(my_hyperparameter[0:nodes_num**2+nodes_num*2],3))

print(" ")

print("NN Biases: ")
print(np.round(my_hyperparameter[nodes_num**2+nodes_num*2:total_num_of_NN_hps],3))

print(" ")

print("Sq. Expo Kernel: ")
print(np.round(my_hyperparameter[total_num_of_NN_hps:total_num_of_NN_hps+1],3))

#print("Sq. Expo Kernel lengthscale: ")
#print(np.round(my_hyperparameter[total_num_of_NN_hps+7],3))

print(" ")

print("Noise: ")
print(np.round(my_hyperparameter[total_num_of_NN_hps+1:total_num_of_NN_hps+4],3))

print(" ")

print("Mean: ")
print(np.round(my_hyperparameter[total_num_of_NN_hps+4:total_num_of_NN_hps+7],3))


NN Weights: 
[-0.349  0.461 -0.678  0.604  0.747  0.827  0.282  0.126  0.572 -0.313
 -0.463 -0.174 -0.619  0.701 -0.109  0.048 -0.318 -0.367 -0.499 -0.321
  0.083  0.668 -0.209  0.552 -0.355  0.192  0.772 -0.739  0.959 -0.772
 -0.713 -0.817 -0.014  0.077  0.381]
 
NN Biases: 
[-196.313  126.321   45.903 -216.376  126.7    -86.184  -60.536 -108.548
   67.332  162.204  -36.093]
 
Sq. Expo Kernel: 
[102.949]
 
Noise: 
[0.    2.654 2.373]
 
Mean: 
[ 5.44103e+02 -5.30000e-02 -4.42000e-01]


# Training the GP Model for all data, with the prev. identified hps

# GP Components - Modified

In [48]:
# For the squared Exponential Function
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)

def my_noise(x,hps,obj):

    total_num_of_NN_hps = obj.args[1]

    my_slope     = hps[total_num_of_NN_hps+1]
    my_pow       = hps[total_num_of_NN_hps+2]
    my_intercept = hps[total_num_of_NN_hps+3]

    my_s =  my_slope * x**my_pow + my_intercept

    noise = np.diag(my_s[:,0])
    
    return noise

# Kernel Function
def kernel_nn(x1,x2,hps,obj):

    nodes_num           = obj.args[0]
    total_num_of_NN_hps = obj.args[1]

    trained_NN_hps = np.array([-0.349,  0.461, -0.678,  0.604,  0.747,  0.827,  0.282,  0.126,  0.572, -0.313,
                   -0.463, -0.174, -0.619,  0.701, -0.109,  0.048, -0.318, -0.367, -0.499, -0.321,
                    0.083,  0.668, -0.209,  0.552, -0.355,  0.192,  0.772, -0.739,  0.959, -0.772,
                   -0.713, -0.817, -0.014,  0.077,  0.381, -196.313,  126.321,   45.903, -216.376,  
                    126.7, -86.184, -60.536, -108.548, 67.332,  162.204,  -36.093])
    
    # NN
    n.set_weights(trained_NN_hps[0:nodes_num].reshape(nodes_num,1),
                  trained_NN_hps[nodes_num:nodes_num**2+nodes_num].reshape(nodes_num,nodes_num),
                  trained_NN_hps[nodes_num**2+nodes_num:nodes_num**2+2*nodes_num].reshape(1,nodes_num))

    n.set_biases(trained_NN_hps[nodes_num**2+2*nodes_num:nodes_num**2+3*nodes_num].reshape(nodes_num),
                 trained_NN_hps[nodes_num**2+3*nodes_num:nodes_num**2+4*nodes_num].reshape(nodes_num),
                 np.array([trained_NN_hps[nodes_num**2+4*nodes_num]]))

    x1_nn = n.forward(x1).reshape(-1,1)
    x2_nn = n.forward(x2).reshape(-1,1)
    d = get_distance_matrix(x1_nn,x2_nn)


    # Main Function
    k = hps[total_num_of_NN_hps] * obj.squared_exponential_kernel(d, 200) #100

    return k


################################################################################

# Mean function: Two-Element piecewise function
def mean2(x,hps,obj):

    total_num_of_NN_hps = obj.args[1]

    x0 = hps[total_num_of_NN_hps+4]
    
    m1 = hps[total_num_of_NN_hps+5]
    m2 = hps[total_num_of_NN_hps+6]

    b1  = 500
    b2 = (m1 - m2) * x0 + b1

    x = x[:,0]

    y = np.where(x <= x0, m1*x + b1, m2*x + b2)
                
    return y

In [49]:
nodes_num = n.nodes_num

total_num_of_NN_hps = 0 # nodes_num**2 + 4*nodes_num + 1     # Depends on the number of layers used
num_of_other_hps = 7                                     # Depends on Kernel, noise and mean functions

#NN_weights_initial_hps = np.random.uniform(-1, 1,nodes_num**2+2*nodes_num)
#NN_biases_initial_hps = np.random.uniform(-250, 250,total_num_of_NN_hps-(nodes_num**2+2*nodes_num))


init_hyperparameters = np.array([200,                   # Kernel
                           0.06, 2, 2,            # Noise  
                           250,-0.01,-0.015])     # Mean.

print("x data: ", x_data.shape)
print("y data: ", y_data.shape)


# Setting the Optimization Bounds for Hyperparameters
bounds1 = np.empty((total_num_of_NN_hps + num_of_other_hps,2))

# NN
#bounds[0:nodes_num**2+2*nodes_num] = np.array([-1.,1.])                      # Weights NN: Define spread and shift in output
#bounds[nodes_num**2+2*nodes_num:total_num_of_NN_hps] = np.array([-250.,250.])    # Biases of NN: Define shift in output

# Kernel Sq Exp 
bounds1[total_num_of_NN_hps] = np.array([100.,10000.])                             # Kernel Variance
#bounds[total_num_of_NN_hps+7] = np.array([10.,300.])                           # Kernel Lengthscale

# Noise
bounds1[total_num_of_NN_hps+1] = np.array([1e-5,1.])                           # Noise Slope
bounds1[total_num_of_NN_hps+2] = np.array([1.,5.])                            # Noise Power
bounds1[total_num_of_NN_hps+3] = np.array([0.,3.])                              # Noise Intercept
# Mean
bounds1[total_num_of_NN_hps+4] = np.array([200.,700.])                          # Mean Piecewise Intersection point
bounds1[total_num_of_NN_hps+5] = np.array([-1e-1,-1e-3])                        # Mean Slope 1
bounds1[total_num_of_NN_hps+6] = np.array([-5e-1,-1e-3])                        # Mean Slope 2


trained_hps = np.array([1.03696800e+02,  1.30611831e-05,  2.64659032e+00,  2.97615034e+00,
  5.24895797e+02, -5.01939901e-02, -3.56348077e-01])


my_gpNN1 = GPOptimizer(x_data,y_data,
            #init_hyperparameters = init_hyperparameters,  # we need enough of those for kernel, noise and prior mean functions
            init_hyperparameters = trained_hps,  # we need enough of those for kernel, noise and prior mean functions
            #noise_variances=np.ones(y_data.shape) * 0.01, #provding noise variances and a noise function will raise a warning 
            compute_device='cpu', 
            gp_kernel_function=kernel_nn, 
            gp_kernel_function_grad=None, 
            gp_mean_function=mean2, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args= np.array([nodes_num,total_num_of_NN_hps]),
            )

#my_gpNN1.train(bounds1, method='global')

print("Training is Done!")

x data:  (1500,)
y data:  (1500,)
Training is Done!


In [50]:
meanNN = my_gpNN1.posterior_mean(x_pred.reshape(-1,1))["f(x)"]
varNN =  my_gpNN1.posterior_covariance(x_pred.reshape(-1,1), variance_only=False, add_noise=True)["v(x)"]

print(my_gpNN1.log_likelihood(my_gpNN1.hyperparameters))
print(my_gpNN1.hyperparameters)

-4829.516560099571
[ 1.03696800e+02  1.30611831e-05  2.64659032e+00  2.97615034e+00
  5.24895797e+02 -5.01939901e-02 -3.56348077e-01]


In [51]:

plt.figure(figsize = (10,10))
plt.plot(x_pred,meanNN, color="red", label = "Posterior Mean", linewidth = 4)
plt.fill_between(np.squeeze(x_pred), meanNN - 2. * np.sqrt(varNN), meanNN + 2. * np.sqrt(varNN), alpha = 0.5, color = "grey", label = "Posterior Variance")
plt.scatter(x_data,y_data,s = 75, color='blue', label = "Training")
plt.scatter(x_data_hidden,y_data_hidden,s = 75,color=[my_color], label = "Testing")

plt.legend(fontsize=label_size,frameon=False,loc='lower left',markerscale=2)
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlim(0,1000)
plt.xticks([])

plt.ylim([250,520])
plt.yticks([])
plt.xlabel("Cycle Number",fontsize=label_size+4)
plt.ylabel("Energy",fontsize=label_size+4)
#plt.savefig('Modified GP All.pdf', bbox_inches='tight')
plt.savefig('/results/Introduction Figures/Modified GP All.pdf', bbox_inches='tight')


# Plotting the Warping of Input Space

In [52]:
x_pred_nn = n.forward(x_pred)

plt.figure(figsize = (10,10))
plt.scatter(x_pred,x_pred_nn, color="black", linewidth = 0.5)
#plt.xlabel("Original Space $\mathcal{X}$",fontsize=label_size)
#plt.ylabel("Transformed Space $\mathcal{X}^*$",fontsize=label_size)
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlim(0,1000)
plt.xticks([])

plt.ylim(0,100)
plt.yticks([])
plt.savefig('/results/Introduction Figures/Modified GP Space Warping.pdf', bbox_inches='tight')