# Imports

In [1]:
from sklearn.preprocessing import RobustScaler, OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
import numpy as np
import re
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import itertools
import matplotlib.pyplot as plt
from torch.nn import MSELoss
import shap

# Data preprocessing

In [2]:
# Load the dataset
dataset = pd.read_csv('battery_feature_extracted.csv')

In [3]:
# Select features and target
X = dataset.drop(columns=['average_voltage'])
y = dataset['average_voltage']

In [4]:
# First split to separate out the test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [5]:
# Second split: separate the training set into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)  # 20% for validation


In [6]:
# Standardizing the features (fit on X_train, apply to all)
scaler = RobustScaler()
#scaler = StandardScaler()
#scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Convert to tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).unsqueeze(1)  # Ensure target tensor is of the right shape
X_val_tensor = torch.FloatTensor(X_val_scaled)
y_val_tensor = torch.FloatTensor(y_val.values).unsqueeze(1)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.FloatTensor(y_test.values).unsqueeze(1)

In [8]:
# Model parameters
num_features = X_train_scaled.shape[1]
output_size = 1  # For regression, we predict a single continuous value

# Define and Load the Saved Model

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [10]:
class FeedForwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeedForwardNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.activation = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.residual = nn.Linear(input_size, output_size)  # Residual connection
        self.alpha = nn.Parameter(torch.tensor(0.5))  # Learnable weight for residual

    def forward(self, x):
        residual = self.residual(x)
        x = self.layer1(x)
        x = self.activation(x)
        x = self.layer2(x)
        return self.alpha * x + (1 - self.alpha) * residual  # Weighted combination

In [11]:
# Define the TabTransformer model with FFNN
class TabTransformer(nn.Module):
    def __init__(self, num_features, output_size=1, dim_embedding=64, num_heads=4, num_layers=4, ffnn_hidden_size=128):
        super(TabTransformer, self).__init__()
        self.embedding = nn.Linear(num_features, dim_embedding)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_embedding, nhead=num_heads, batch_first=True, dropout=0.70)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Replace the regression layer with a custom feed-forward neural network
        self.ffnn = FeedForwardNN(dim_embedding, ffnn_hidden_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)  # Adding a sequence dimension
        x = self.transformer(x)
        x = x[:, 0, :]  # Select the first token (or the entire sequence can be aggregated differently)
        x = self.ffnn(x)  # Pass through the feed-forward network
        return x

In [12]:
# Initialize the model, loss, and optimizer
model = TabTransformer(
    num_features=num_features,  # Input feature size
    output_size=output_size,   # Output size (1 for regression)
    dim_embedding=128,         # Embedding dimension
    num_heads=2,               # Number of attention heads
    num_layers=2,              # Number of transformer layers
    ffnn_hidden_size=128       # Hidden size for the feed-forward neural network
).to(device)


In [13]:
# Load the trained weights
model.load_state_dict(torch.load('entire_model_transformer_fnn_mae2886_mse2855_r28910.pth', map_location=device))


  model.load_state_dict(torch.load('entire_model_transformer_fnn_mae2886_mse2855_r28910.pth', map_location=device))


<All keys matched successfully>

In [14]:
# Set the model to evaluation mode
model.eval()

TabTransformer(
  (embedding): Linear(in_features=3226, out_features=128, bias=True)
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=2048, bias=True)
        (dropout): Dropout(p=0.7, inplace=False)
        (linear2): Linear(in_features=2048, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.7, inplace=False)
        (dropout2): Dropout(p=0.7, inplace=False)
      )
    )
  )
  (ffnn): FeedForwardNN(
    (layer1): Linear(in_features=128, out_features=128, bias=True)
    (activation): ReLU()
    (layer2): Linear(in_features=128, out_features=1, bias=True)
    (residual): Linear

# Generate Predictions

In [15]:
# Generate predictions
with torch.no_grad():
    predictions = model(X_test_tensor.to(device))

# Convert predictions and targets to NumPy arrays
y_pred = predictions.cpu().numpy().flatten()
y_true = y_test_tensor.numpy().flatten()

# Compute metrics
mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

# Print results
print(f"Test MSE: {mse:.4f}")
print(f"Test MAE: {mae:.4f}")
print(f"Test R¬≤:  {r2:.4f}")

Test MSE: 0.2856
Test MAE: 0.2887
Test R¬≤:  0.8910


# Per ion metrics

In [16]:
# 1. Identify the one-hot encoded ion columns (adjust prefix if needed)
ion_columns = [col for col in X_test.columns if col.startswith('working_ion_')]

# 2. Create a DataFrame with true/pred values
X_test_df = X_test.reset_index(drop=True).copy()
X_test_df['true'] = y_true
X_test_df['pred'] = y_pred

# 3. Compute per-ion metrics
print("\nüîç Per-ion metrics on test set:")
for ion in ion_columns:
    subset = X_test_df[X_test_df[ion] == 1]
    if not subset.empty:
        y_true_ion = subset['true'].values
        y_pred_ion = subset['pred'].values
        mae_ion = mean_absolute_error(y_true_ion, y_pred_ion)
        mse_ion = mean_squared_error(y_true_ion, y_pred_ion)
        r2_ion = r2_score(y_true_ion, y_pred_ion)
        print(f"{ion.replace('working_ion_', '').upper():<8}: MAE = {mae_ion:.4f}, MSE = {mse_ion:.4f}, R¬≤ = {r2_ion:.4f}")



üîç Per-ion metrics on test set:
AL      : MAE = 0.2139, MSE = 0.1560, R¬≤ = 0.8643
CA      : MAE = 0.2162, MSE = 0.1163, R¬≤ = 0.9059
CS      : MAE = 0.6300, MSE = 0.6243, R¬≤ = -0.6476
K       : MAE = 0.1505, MSE = 0.0417, R¬≤ = 0.9876
LI      : MAE = 0.2975, MSE = 0.2826, R¬≤ = 0.8669
MG      : MAE = 0.3996, MSE = 0.7394, R¬≤ = 0.7902
NA      : MAE = 0.1735, MSE = 0.0655, R¬≤ = 0.9673
RB      : MAE = 0.3502, MSE = 0.3169, R¬≤ = 0.8036
Y       : MAE = 0.2433, MSE = 0.1224, R¬≤ = 0.7393
ZN      : MAE = 0.3107, MSE = 0.2320, R¬≤ = 0.7404
