In [1]:
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, KFold
import pandas as pd
import numpy as np
import re
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import itertools
import matplotlib.pyplot as plt
from torch.nn import MSELoss
import shap

# Data preprocessing

In [2]:
# Load dataset
dataset = pd.read_csv('battery_feature_extracted.csv')
X = dataset.drop(columns=['average_voltage'])
y = dataset['average_voltage']

In [3]:
# Train/val/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [4]:
# Scale features
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


In [5]:
# Convert to tensors
X_train_tensor = torch.FloatTensor(X_train_scaled)
y_train_tensor = torch.FloatTensor(y_train.values).unsqueeze(1)
X_val_tensor = torch.FloatTensor(X_val_scaled)
y_val_tensor = torch.FloatTensor(y_val.values).unsqueeze(1)
X_test_tensor = torch.FloatTensor(X_test_scaled)
y_test_tensor = torch.FloatTensor(y_test.values).unsqueeze(1)

# Data imbalance

In [6]:
# Compute per-sample weights
ion_columns = [col for col in X_train.columns if col.startswith("working_ion_")]
ion_counts = X_train[ion_columns].sum()
ion_weights = 1.0 / ion_counts
ion_weights /= ion_weights.sum()
train_weights = X_train[ion_columns].dot(ion_weights.astype(np.float32))
train_weights_tensor = torch.tensor(train_weights.values.astype(np.float32)).unsqueeze(1)


In [7]:
# Define FeedForwardNN used within TabTransformer
class FeedForwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeedForwardNN, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.activation = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, output_size)
        self.residual = nn.Linear(input_size, output_size)
        self.alpha = nn.Parameter(torch.tensor(0.5))

    def forward(self, x):
        residual = self.residual(x)
        x = self.activation(self.layer1(x))
        x = self.layer2(x)
        return self.alpha * x + (1 - self.alpha) * residual


In [8]:
# Define TabTransformer with FFNN
class TabTransformer(nn.Module):
    def __init__(self, num_features, output_size=1, dim_embedding=128, num_heads=2, num_layers=2, ffnn_hidden_size=128):
        super(TabTransformer, self).__init__()
        self.embedding = nn.Linear(num_features, dim_embedding)
        encoder_layer = nn.TransformerEncoderLayer(d_model=dim_embedding, nhead=num_heads, batch_first=True, dropout=0.7)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.ffnn = FeedForwardNN(dim_embedding, ffnn_hidden_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)
        x = self.transformer(x)
        x = x[:, 0, :]
        return self.ffnn(x)


In [9]:
# Weighted composite loss
class WeightedCompositeLoss(nn.Module):
    def forward(self, outputs, targets, weights):
        mse = (weights * (outputs - targets) ** 2).mean()
        mae = (weights * torch.abs(outputs - targets)).mean()
        return mse + 0.5 * mae


In [10]:
# Setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TabTransformer(num_features=X_train_tensor.shape[1]).to(device)
criterion = WeightedCompositeLoss()
optimizer = optim.Adam(model.parameters(), lr=0.00075)


In [11]:
# Move tensors to device
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
train_weights_tensor = train_weights_tensor.to(device)
X_val_tensor = X_val_tensor.to(device)
y_val_tensor = y_val_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)


In [12]:
# Training
training_losses, validation_losses = [], []
for epoch in range(2000):
    model.train()
    optimizer.zero_grad()
    output = model(X_train_tensor)
    train_loss = criterion(output, y_train_tensor, train_weights_tensor)
    train_loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        val_output = model(X_val_tensor)
        val_loss = criterion(val_output, y_val_tensor, torch.ones_like(y_val_tensor))

    training_losses.append(train_loss.item())
    validation_losses.append(val_loss.item())

    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Train Loss: {train_loss.item():.4f}, Val Loss: {val_loss.item():.4f}')


Epoch 0, Train Loss: 0.2693, Val Loss: 4.4267
Epoch 10, Train Loss: 0.0917, Val Loss: 4.4085
Epoch 20, Train Loss: 0.0907, Val Loss: 3.5020
Epoch 30, Train Loss: 0.0890, Val Loss: 3.4655
Epoch 40, Train Loss: 0.0794, Val Loss: 3.6511
Epoch 50, Train Loss: 0.0572, Val Loss: 4.4903
Epoch 60, Train Loss: 0.0491, Val Loss: 4.9404
Epoch 70, Train Loss: 0.0437, Val Loss: 4.5306
Epoch 80, Train Loss: 0.0394, Val Loss: 4.7564
Epoch 90, Train Loss: 0.0329, Val Loss: 4.3306
Epoch 100, Train Loss: 0.0301, Val Loss: 4.2787
Epoch 110, Train Loss: 0.0267, Val Loss: 3.6967
Epoch 120, Train Loss: 0.0266, Val Loss: 3.4371
Epoch 130, Train Loss: 0.0232, Val Loss: 3.2921
Epoch 140, Train Loss: 0.0209, Val Loss: 3.0072
Epoch 150, Train Loss: 0.0207, Val Loss: 2.9618
Epoch 160, Train Loss: 0.0195, Val Loss: 2.9172
Epoch 170, Train Loss: 0.0179, Val Loss: 2.5621
Epoch 180, Train Loss: 0.0175, Val Loss: 2.6360
Epoch 190, Train Loss: 0.0169, Val Loss: 2.2799
Epoch 200, Train Loss: 0.0159, Val Loss: 2.0698
Epo

In [13]:
# Evaluation
model.eval()
with torch.no_grad():
    predictions = model(X_test_tensor)
    mse = nn.MSELoss()(predictions, y_test_tensor).item()
    mae = nn.L1Loss()(predictions, y_test_tensor).item()
    ss_total = torch.sum((y_test_tensor - torch.mean(y_test_tensor))**2)
    ss_residual = torch.sum((y_test_tensor - predictions)**2)
    r2_score = 1 - (ss_residual / ss_total)


In [14]:
print(f'\nTest MSE: {mse:.4f}')
print(f'Test MAE: {mae:.4f}')
print(f'Test R²: {r2_score.item():.4f}')



Test MSE: 0.3485
Test MAE: 0.3154
Test R²: 0.8670


In [17]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

X_test_df = X_test.reset_index(drop=True).copy()
X_test_df['true'] = y_test.values
X_test_df['pred'] = predictions.cpu().numpy().flatten()

print("\nPer-ion metrics on test set:")
for ion in ion_columns:
    subset = X_test_df[X_test_df[ion] == 1]
    if not subset.empty:
        y_true = subset['true'].values
        y_pred = subset['pred'].values

        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)

        print(f"{ion.replace('working_ion_', '')}: MAE = {mae:.4f}, MSE = {mse:.4f}, R² = {r2:.4f}")



Per-ion metrics on test set:
Al: MAE = 0.3064, MSE = 0.1806, R² = 0.8430
Ca: MAE = 0.2114, MSE = 0.0864, R² = 0.9300
Cs: MAE = 0.4337, MSE = 0.2710, R² = 0.2849
K: MAE = 0.2114, MSE = 0.0642, R² = 0.9809
Li: MAE = 0.3490, MSE = 0.3923, R² = 0.8152
Mg: MAE = 0.4786, MSE = 0.9721, R² = 0.7242
Na: MAE = 0.1529, MSE = 0.0414, R² = 0.9793
Rb: MAE = 0.1458, MSE = 0.0272, R² = 0.9832
Y: MAE = 0.3089, MSE = 0.1129, R² = 0.7595
Zn: MAE = 0.2473, MSE = 0.1437, R² = 0.8392


In [16]:
# Reattach ion info to test set
X_test_df = X_test.reset_index(drop=True).copy()
X_test_df['true'] = y_test.values
X_test_df['pred'] = predictions.cpu().numpy().flatten()

print("\nPer-ion MAE on test set:")
for ion in ion_columns:
    subset = X_test_df[X_test_df[ion] == 1]
    if not subset.empty:
        mae = np.mean(np.abs(subset['true'] - subset['pred']))
        print(f"{ion.replace('working_ion_', '')}: MAE = {mae:.4f}")



Per-ion MAE on test set:
Al: MAE = 0.3064
Ca: MAE = 0.2114
Cs: MAE = 0.4337
K: MAE = 0.2114
Li: MAE = 0.3490
Mg: MAE = 0.4786
Na: MAE = 0.1529
Rb: MAE = 0.1458
Y: MAE = 0.3089
Zn: MAE = 0.2473
