# Extract PCA features and training labels
Loop through cells to ensure input-output are pairwise

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler,MinMaxScaler

Load cell lists, interpolated capacity data, and fitted empirical parameters on training set

In [2]:
training_cells = pd.read_csv("training_V2.csv",header=None).to_numpy(dtype=str).reshape(-1,).tolist()
test_in_cells = pd.read_csv("test_in_V2.csv",header=None).to_numpy(dtype=str).reshape(-1,).tolist()
test_out_cells = pd.read_csv("test_out_V2.csv",header=None).to_numpy(dtype=str).reshape(-1,).tolist()

# Import interpolated data for knot-point approach and end-to-end
Q_data = pd.read_csv("NMC_data_V2_interp_clean.csv")
# Import fitted parameters for sequential optimization
# b_data = pd.read_csv("empirical_parameters_train_final.csv")
b_data = pd.read_csv("Empirical_parameters_train_py.csv",index_col=0)

# Import feature table
feature_table = pd.read_csv("feature_all.csv")

Create numpy array holders for all needed variables

In [3]:
num_training_cells = len(training_cells)
num_test_in_cells = len(test_in_cells)
num_test_out_cells = len(test_out_cells)

feature_list = feature_table.columns.to_list()
for element in ['Group', 'Cell','Lifetime']:
    feature_list.remove(element)

num_parameters = 3
num_knot = 5
num_Q = 21
num_features = len(feature_list)

# Create variables for training input as well as training labels for different approaches
b_train = np.ndarray((num_training_cells,num_parameters))
N_train = np.ndarray((num_training_cells,num_Q))

knot_train = np.ndarray((num_training_cells,num_knot))
X_train = np.ndarray((num_training_cells,num_features))

# Create variables for test inputs
X_test_in = np.ndarray((num_test_in_cells,num_features))
N_test_in = np.ndarray((num_test_in_cells,num_Q))

X_test_out = np.ndarray((num_test_out_cells,num_features))
N_test_out = np.ndarray((num_test_out_cells,num_Q))

Loop through training data

In [4]:
for iii,cell in enumerate(training_cells):
    feature_cell = feature_table[feature_table['Cell']==cell][feature_list].values
    b_cell = b_data[b_data['Cell']==cell][['b1','b2','b3']].values
    N_cell = Q_data[Q_data['cellID']==cell]['Ah_throughput'].values
    X_train[iii] = feature_cell
    b_train[iii] = b_cell
    N_train[iii] = np.abs(N_cell) # abs() to ensure the first point is nonnegative (very small negative numbers due to interpolation)
    
    knot_position = Q_data[Q_data['cellID']==cell]['Ah_throughput'].to_numpy()[0:21:4]
    knot_pos_diff = np.diff(knot_position)
    knot_train[iii] = knot_pos_diff

Loop through high-DoD test data (test_in)

In [5]:
for iii,cell in enumerate(test_in_cells):
    feature_cell = feature_table[feature_table['Cell']==cell][feature_list].values
    N_cell = Q_data[Q_data['cellID']==cell]['Ah_throughput'].values

    X_test_in[iii] = feature_cell
    N_test_in[iii] = np.abs(N_cell)

Loop through low-DoD test data (test_out)

In [6]:
for iii,cell in enumerate(test_out_cells):
    feature_cell = feature_table[feature_table['Cell']==cell][feature_list].values
    N_cell = Q_data[Q_data['cellID']==cell]['Ah_throughput'].values

    X_test_out[iii] = feature_cell
    N_test_out[iii] = np.abs(N_cell)

Apply PCA on features from training set and also transform features from both test sets

In [7]:
X_scaler = StandardScaler()
X_train_scaled = X_scaler.fit_transform(X_train)
PCA_model = PCA(n_components=10)
X_train_PCA = PCA_model.fit_transform(X_train_scaled)

X_test_in_scaled = X_scaler.transform(X_test_in)
X_test_in_PCA = PCA_model.transform(X_test_in_scaled)

X_test_out_scaled = X_scaler.transform(X_test_out)
X_test_out_PCA = PCA_model.transform(X_test_out_scaled)

Save all variables into csv files

In [8]:
# Processed PCA features
np.savetxt('Processed input output/X_train_PCA.csv',X_train_PCA,delimiter=",")
np.savetxt('Processed input output/X_test_in_PCA.csv',X_test_in_PCA,delimiter=",")
np.savetxt('Processed input output/X_test_out_PCA.csv',X_test_out_PCA,delimiter=",")

# Processed label for training
np.savetxt('Processed input output/b_train.csv',b_train,delimiter=",")
np.savetxt('Processed input output/knot_train.csv',knot_train,delimiter=",")

# Processed ah-throughput at equidistant SOH
np.savetxt('Processed input output/N_train.csv',N_train,delimiter=",")
np.savetxt('Processed input output/N_test_in.csv',N_test_in,delimiter=",")
np.savetxt('Processed input output/N_test_out.csv',N_test_out,delimiter=",")

