In [4]:
from gpcam import GPOptimizer


import numpy as np
from numpy.random import default_rng

import random

import matplotlib.pyplot as plt
import plotly.graph_objects as go

import os
import csv

import random

# Loading Data

In [5]:
energy_data = np.load("/data/Synthetic Data Generation_1/my_synthetic_energy.npy")
cycle_number = np.load("/data/Synthetic Data Generation_1/my_synthetic_cycleNum.npy")



label_size = 30

num_of_datasets = 50

considered_batteries = ([5546, 9477, 2231, 4437, 7059, 5259, 8330, 1068, 8214, 5888, 3275, 6845, 7671, 
                         299, 5038, 3503, 8673, 2236, 3644, 4980, 993, 7545, 654, 1418, 6090, 7936, 8792, 
                         6910, 2933, 2382, 9730, 8476, 1882, 7986, 7091, 4813, 3086, 3908, 1539, 8567, 2152, 
                         5738, 8646, 9692, 2661, 6766, 7230, 512, 758, 2881])


print(considered_batteries)


plt.figure(figsize = (20,10))
for i in considered_batteries: plt.scatter(cycle_number,energy_data[int(i)])

plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlabel("Cycle Number",fontsize=label_size)
plt.ylabel("Quantity of Interest",fontsize=label_size)
plt.show()

print("max y: ", np.max(energy_data))

# Initializing the data to fit the GP model
data_size = num_of_datasets

# All Data
x_data = np.tile(cycle_number, data_size).reshape(-1, 1) # repeat cycle 20 times to create x_data
y_data = np.vstack(energy_data[considered_batteries, :].T).reshape(-1, 1)
x_pred = np.linspace(0,1000,1001).reshape(-1,1)


print("x data: ", x_data.shape)
print("y data: ", y_data.shape)
        
    
plt.scatter(x_data,y_data)

[5546, 9477, 2231, 4437, 7059, 5259, 8330, 1068, 8214, 5888, 3275, 6845, 7671, 299, 5038, 3503, 8673, 2236, 3644, 4980, 993, 7545, 654, 1418, 6090, 7936, 8792, 6910, 2933, 2382, 9730, 8476, 1882, 7986, 7091, 4813, 3086, 3908, 1539, 8567, 2152, 5738, 8646, 9692, 2661, 6766, 7230, 512, 758, 2881]
max y:  529.5870098158235
x data:  (2500, 1)
y data:  (2500, 1)


<matplotlib.collections.PathCollection at 0x7fac7ff4c5e0>

# Creating the Subfolder in Results

In [1]:
# Specify the path for the new folder
new_folder_path = f"/results/Methods Figures"

# Create the folder
os.makedirs(new_folder_path, exist_ok=True)

NameError: name 'os' is not defined

# 1 - Power-law Model

## GP Components 

In [6]:
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)

# For the Noise
def s(x, my_slope, my_pow, my_intercept):
    o = my_slope * (x**my_pow) + my_intercept
    return o


def my_noise(x,hps,obj):

    my_slope     = hps[2]
    my_pow       = hps[3]
    my_intercept = hps[4]
    
    noise = np.identity(len(x)) * s(x,my_slope,my_pow,my_intercept)

    return noise

def kernel(x1,x2,hps,obj):
    d = get_distance_matrix(x1,x2) 
    k = hps[0] * obj.squared_exponential_kernel(d,hps[1])
    return k


def mean(x,hps,obj):
    return -( hps[5] * x[:,0])**2 + hps[6]

## GP Modeling

In [11]:
# Initializing the GP Model
init_hyperparameters = np.array( [3.23032552e+01, 1.67736159e+02, 1.93296950e-05, 2.59001100e+00,
 2.88176470e+00, 1.23187950e-02, 4.99961417e+02])

my_gp = GPOptimizer(x_data,y_data,
            init_hyperparameters = init_hyperparameters,  # we need enough of those for kernel, noise and prior mean functions
            compute_device='cpu', 
            gp_kernel_function=kernel,
            gp_kernel_function_grad=None, 
            gp_mean_function=mean, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args=None)

# Setting the Optimization Bounds for Hyperparameters
bounds = np.empty((7,2))

# Kernel Sq Exp
bounds[0] = np.array([1e-5,6000.])                     # Kernel Variance
bounds[1] = np.array([100.,1000.])                     # Kernel Lengthscale

# Noise
bounds[2] = np.array([1e-10,1.])                       # Noise Slope
bounds[3] = np.array([2,5.])                           # Noise Power
bounds[4] = np.array([0.,6.])                          # Noise Intercept
# Mean
bounds[5] = np.array([1e-5,100.])                      # Mean slope
bounds[6] = np.array([400.,600.])                       # Mean intercept

#my_gp.train(hyperparameter_bounds=bounds,max_iter=100)


In [12]:
#print("hps: ", my_gpo.hyperparameters)
f = my_gp.posterior_mean(x_pred)["f(x)"]
v = my_gp.posterior_covariance(x_pred)["v(x)"]

# Plotting the data
plt.figure(figsize = (20,10))
plt.plot(x_pred[:,0],f, color = "blue", linewidth = 3, label = str(data_size) + " cells")
plt.scatter(x_data[:,0],y_data, color = "black") # Training Data
plt.fill_between(x_pred[:,0],f - 2. * np.sqrt(v), f + 2. * np.sqrt(v), alpha = 0.5, color = "grey")
plt.xlabel("cycle number") 
plt.ylabel("energy")
plt.legend()
plt.show()

## Plotting the Prior Mean

In [13]:
#print("hps: ", my_gp.hyperparameters)

trained_hps = np.array( [3.23032552e+01, 1.67736159e+02, 1.93296950e-05, 2.59001100e+00,
 2.88176470e+00, 1.23187950e-02, 4.99961417e+02])


#trained_prior_y = mean(x_pred,my_gp.hyperparameters,2)

trained_prior_y_powerlaw = mean(x_pred,trained_hps,2)


# Plotting the data
plt.figure(figsize = (15,10))
#plt.plot(x_pred[:,0],prior_y, color = "red", linewidth = 6, label = "Trained Prior")
plt.plot(x_pred[:,0],trained_prior_y_powerlaw, color = "red", linewidth = 6, label = "Trained Prior Mean")
plt.scatter(x_data[:,0],y_data, color = "black",label = "Synthetic Data") # All Data
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlabel("Cycle Number",fontsize=label_size) 
plt.ylabel("Quantity of Interest",fontsize=label_size)
plt.legend(fontsize=label_size,frameon=False)
#plt.show()

plt.savefig("/results/Methods Figures/Power Law Model.png", dpi=600)


#  2 - Two-Element Piecewise Model

## GP Components

In [16]:
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)


# For the Noise
def s(x, my_slope, my_pow, my_intercept):
    o = my_slope * (x**my_pow) + my_intercept
    return o


def my_noise(x,hps,obj):

    my_slope     = hps[2]
    my_pow       = hps[3]
    my_intercept = hps[4]
    
    noise = np.identity(len(x)) * s(x,my_slope,my_pow,my_intercept)

    return noise

def kernel(x1,x2,hps,obj):
    d = get_distance_matrix(x1,x2) 
    k = hps[0] * obj.squared_exponential_kernel(d,hps[1])
    return k

# here I am assuming that the mean function is a piecewise function
def mean2(x,hps,obj):
    x0 = hps[5]

    m1 = hps[6]
    m2 = hps[7]

    b1 = 497
    b2 = (m1 - m2) * x0 + b1

    y = np.where(x[:,0] <= x0, m1*x[:,0] + b1, m2*x[:,0] + b2)
    return y

## GP Modeling

In [17]:
# Initializing the GP Model
init_hyperparameters = np.array( [  8.14679812e+00,  9.89985354e+02,  7.12115271e-06,  2.75031358e+00,
  3.06519009e+00,  5.06802335e+02, -4.83379371e-02, -2.55178020e-01])

my_gp1 = GPOptimizer(x_data,y_data,
            init_hyperparameters = init_hyperparameters,  # we need enough of those for kernel, noise and prior mean functions
            compute_device='cpu', 
            gp_kernel_function=kernel,
            gp_kernel_function_grad=None, 
            gp_mean_function=mean2, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args=None)

# Setting the Optimization Bounds for Hyperparameters
bounds = np.empty((8,2))

# Kernel Sq Exp
bounds[0] = np.array([1e-5,6000.])                     # Kernel Variance
bounds[1] = np.array([100.,1000.])                     # Kernel Lengthscale

# Noise
bounds[2] = np.array([1e-10,1.])                       # Noise Slope
bounds[3] = np.array([2,5.])                           # Noise Power
bounds[4] = np.array([0.,6.])                          # Noise Intercept
# Mean
bounds[5] = np.array([350.,750.])                      # Mean Elements Intersection
bounds[6] = np.array([-1e-1,-1e-3])                    # Mean Slope 1
bounds[7] = np.array([-5e-1,-1e-3])                     # Mean Slope 2

#my_gp1.train(hyperparameter_bounds=bounds,max_iter=100)


In [18]:
#print("hps: ", my_gpo.hyperparameters)
f1 = my_gp1.posterior_mean(x_pred)["f(x)"]
v1 = my_gp1.posterior_covariance(x_pred)["v(x)"]

# Plotting the data
plt.figure(figsize = (20,10))
plt.plot(x_pred[:,0],f1, color = "blue", linewidth = 3, label = str(data_size) + " cells")
plt.scatter(x_data[:,0],y_data, color = "black") # Training Data
plt.fill_between(x_pred[:,0],f1 - 2. * np.sqrt(v1), f1 + 2. * np.sqrt(v1), alpha = 0.5, color = "grey")
plt.xlabel("cycle number") 
plt.ylabel("energy")
plt.legend()
plt.show()

## Plotting the Prior Mean

In [20]:
print("hps: ", my_gp1.hyperparameters)

#trained_hps = np.array( [ 6.50851484e+00,  8.99467861e+02,  9.65070823e-06,  2.69545130e+00,
#  2.97887478e+00,  4.95892427e+02, -5.14529993e-02, -2.45587221e-01])

trained_hps = np.array( [  8.14679812e+00,  9.89985354e+02,  7.12115271e-06,  2.75031358e+00,
  3.06519009e+00,  5.06802335e+02, -4.83379371e-02, -2.55178020e-01])


#trained_prior_y1 = mean2(x_pred,my_gp1.hyperparameters,2)

trained_prior_y1 = mean2(x_pred,trained_hps,2)


# Plotting the data
plt.figure(figsize = (15,10))
plt.plot(x_pred[:,0],trained_prior_y1, color = "red", linewidth = 6, label = "Trained Prior Mean")
plt.scatter(x_data[:,0],y_data, color = "black",label = "Synthetic Data") # All Data
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlabel("Cycle Number",fontsize=label_size) 
plt.ylabel("Quantity of Interest",fontsize=label_size)
plt.legend(fontsize=label_size,frameon=False)
#plt.show()

plt.savefig("/results/Methods Figures/Piecewise Model.png", dpi=600)


hps:  [ 8.14679812e+00  9.89985354e+02  7.12115271e-06  2.75031358e+00
  3.06519009e+00  5.06802335e+02 -4.83379371e-02 -2.55178020e-01]


# 3- Three-Element Piecewise Function

## GP Components

In [21]:
def get_distance_matrix(x1,x2):
    d = np.zeros((len(x1),len(x2)))
    for i in range(x1.shape[1]):
        d += (x1[:,i].reshape(-1, 1) - x2[:,i])**2
    return np.sqrt(d)

# For the Noise
def s(x, my_slope, my_pow, my_intercept):
    o = my_slope * (x**my_pow) + my_intercept
    return o

def my_noise(x,hps,obj):
    my_slope     = hps[2]
    my_pow       = hps[3]
    my_intercept = hps[4]
    
    noise = np.identity(len(x)) * s(x,my_slope,my_pow,my_intercept)

    return noise

def kernel(x1,x2,hps,obj):
    d = get_distance_matrix(x1,x2) 
    k = hps[0] * obj.squared_exponential_kernel(d,hps[1])
    return k


def mean3(x,hps,obj):
    x0 = hps[5]
    x1 = x0 + hps[6]

    m1 = hps[7]
    m2 = hps[8]
    m3 = hps[9]

    b1 = 497
    b2 = (m1 - m2) * x0 + b1
    b3 = (m2 - m3) * x1 + b2

    y = np.where(x[:, 0] <= x0, m1*x[:, 0] + b1, 
                 np.where(x[:, 0] <= x1, m2*x[:, 0] + b2, 
                          m3*x[:, 0] + b3))
    return y

## GP Modeling

In [22]:
# Initializing the GP Model
init_hyperparameters = np.array( [ 4.23010310e+00,  8.62740563e+02,  7.04241178e-06,  2.75391706e+00,
  3.12227269e+00,  5.03318638e+02,  1.74737412e+02, -4.85811742e-02,
 -2.16713998e-01, -3.71356810e-01])


my_gp2 = GPOptimizer(x_data,y_data,
            init_hyperparameters = init_hyperparameters,  # we need enough of those for kernel, noise and prior mean functions
            compute_device='cpu', 
            gp_kernel_function=kernel,
            gp_kernel_function_grad=None, 
            gp_mean_function=mean3, 
            gp_mean_function_grad=None,
            gp_noise_function=my_noise,
            normalize_y=False,
            sparse_mode=False,
            gp2Scale = False,
            store_inv=False, 
            ram_economy=False, 
            args=None)


# Setting the Optimization Bounds for Hyperparameters
bounds = np.empty((10,2))

# Kernel Sq Exp
bounds[0] = np.array([1e-5,6000.])                     # Kernel Variance
bounds[1] = np.array([100.,1000.])                     # Kernel Lengthscale

# Noise
bounds[2] = np.array([1e-10,1.])                       # Noise Slope
bounds[3] = np.array([2,5.])                           # Noise Power
bounds[4] = np.array([0.,6.])                          # Noise Intercept
# Mean
bounds[5] = np.array([350.,750.])                      # Mean Elements Intersection
bounds[6] = np.array([50.,400.])                       # Mean Elements Intersection
bounds[7] = np.array([-1e-1,-1e-3])                    # Mean Slope 1
bounds[8] = np.array([-4e-1,-1e-3])                    # Mean Slope 2
bounds[9] = np.array([-4e-1,-1e-3])                    # Mean Slope 2

#my_gp2.train(hyperparameter_bounds=bounds,max_iter=100)

In [23]:
#print("hps: ", my_gpo.hyperparameters)
f2 = my_gp2.posterior_mean(x_pred)["f(x)"]
v2 = my_gp2.posterior_covariance(x_pred)["v(x)"]

# Plotting the data
plt.figure(figsize = (20,10))
plt.plot(x_pred[:,0],f2, color = "blue", linewidth = 3, label = str(data_size) + " cells")
plt.scatter(x_data[:,0],y_data, color = "black") # Training Data
plt.fill_between(x_pred[:,0],f2 - 2. * np.sqrt(v2), f2 + 2. * np.sqrt(v2), alpha = 0.5, color = "grey")
plt.xlabel("cycle number") 
plt.ylabel("energy")
plt.legend()
plt.show()

## Plotting the Prior Mean

In [24]:
print("hps: ", my_gp2.hyperparameters)

trained_hps = np.array( [ 4.23010310e+00,  8.62740563e+02,  7.04241178e-06,  2.75391706e+00,
  3.12227269e+00,  5.03318638e+02,  1.74737412e+02, -4.85811742e-02,
 -2.16713998e-01, -3.71356810e-01])

trained_prior_y2 = mean3(x_pred,trained_hps,2)

# Plotting the data
plt.figure(figsize = (15,10))
plt.plot(x_pred[:,0],trained_prior_y2, color = "red", linewidth = 6, label = "Trained Prior Mean")
plt.scatter(x_data[:,0],y_data, color = "black",label = "Synthetic Data") # All Data
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlabel("Cycle Number",fontsize=label_size) 
plt.ylabel("Quantity of Interest",fontsize=label_size)
plt.legend(fontsize=label_size,frameon=False)
#plt.show()

plt.savefig("/results/Methods Figures/Three-Elements Piecewise Model.png", dpi=600)


hps:  [ 4.23010310e+00  8.62740563e+02  7.04241178e-06  2.75391706e+00
  3.12227269e+00  5.03318638e+02  1.74737412e+02 -4.85811742e-02
 -2.16713998e-01 -3.71356810e-01]


# Plotting all models together

In [25]:
GT_hps = np.array([0,0,                      # kernel - Not Needed here
                   1.002e-05,  2.69, 3,      # Noise
                500,-0.05,-0.2])             # Mean

GT_mean_values = mean2(x_pred,GT_hps,2)

my_color = np.array([102, 178, 255])/255  

# Plotting the data
plt.figure(figsize = (10,10))
plt.scatter(x_data[:,0],y_data, color =[my_color], label='Data') # Training Data
plt.plot(x_pred[:,0],GT_mean_values, color = "blue", linewidth = 7, linestyle ='--', label = "Ground Truth Mean")
plt.plot(x_pred[:,0],trained_prior_y_powerlaw, color = "green", linewidth = 7, label = "Power Law")
plt.plot(x_pred[:,0],trained_prior_y1, color = "red", linewidth = 7, label = "2-Element Piecewise")
plt.plot(x_pred[:,0],trained_prior_y2, color = "black", linewidth = 7, label = "3-Element Piecewise")
plt.tick_params(axis='both', which='major', labelsize=label_size) # Set the font size of the tick labels on the x and y axes
plt.xlabel("Cycle Number",fontsize=label_size) 
plt.ylabel("QoI",fontsize=label_size)
plt.legend(fontsize=label_size,frameon=False)
plt.xlim([0,1000])
plt.ylim([250,520])
plt.xticks([0,250,500,750,1000])
#plt.show()

plt.savefig('/results/Methods Figures/All Mean Models.pdf', bbox_inches='tight')
