In [1]:
!unzip Dataset_Li-ion.zip

Archive:  Dataset_Li-ion.zip
   creating: Dataset_Li-ion/
   creating: Dataset_Li-ion/0degC/
  inflating: Dataset_Li-ion/0degC/585_C20DisCh.csv  
  inflating: Dataset_Li-ion/0degC/585_Dis_0p5C.csv  
  inflating: Dataset_Li-ion/0degC/585_Dis_2C.csv  
  inflating: Dataset_Li-ion/0degC/585_HPPC.csv  
  inflating: Dataset_Li-ion/0degC/589_Cap_1C.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge1.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge2.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge3.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge4.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge5.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge6.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge7.csv  
  inflating: Dataset_Li-ion/0degC/589_Charge8.csv  
  inflating: Dataset_Li-ion/0degC/589_HWFET.csv  
  inflating: Dataset_Li-ion/0degC/589_LA92.csv  
  inflating: Dataset_Li-ion/0degC/589_Mixed1.csv  
  inflating: Dataset_Li-ion/0degC/589_Mixed2.csv  
  inflating: Dataset_Li-ion/0degC

In [2]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.4-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.4-py3-none-any.whl (247 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m247.0/247.0 kB[0m [31m23.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.4 colorlog-6.9.0 optuna-4.4.0


In [3]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# BiLSTM

In [4]:
################################################################################
# Part 3: BiLSTM Model - Complete Workflow
# This script loads data, finds the best hyperparameters for a BiLSTM model
# using Bayesian Optimization, and then trains and evaluates the final model.
################################################################################

import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import RootMeanSquaredError
import optuna

# --- 1. Data Preprocessing ---
print("--- Starting: 1. Data Preprocessing ---")

# dataset_base_path = 'C:/Users/sahar/Documents/sut/lpd/project/Dataset_Li-ion'
dataset_base_path = 'Dataset_Li-ion'

# Define the lists of train and test files
train_files = [
    '0degC/589_Mixed1', '0degC/589_Mixed2', '0degC/590_Mixed4',
    '0degC/590_Mixed5', '0degC/590_Mixed6', '0degC/590_Mixed8',
    '10degC/567_Mixed1', '10degC/567_Mixed2', '10degC/571_Mixed4',
    '10degC/571_Mixed5', '10degC/571_Mixed6', '10degC/571_Mixed8',
    '25degC/551_Mixed1', '25degC/551_Mixed2',
    '25degC/552_Mixed4', '25degC/552_Mixed5', '25degC/552_Mixed6',
    '25degC/552_Mixed8',
]
test_files = [
    '0degC/589_LA92', '0degC/589_UDDS', '0degC/589_US06', '0degC/590_Mixed7',
    '10degC/576_UDDS', '10degC/567_US06', '10degC/571_Mixed7',
    '25degC/551_LA92', '25degC/551_UDDS', '25degC/551_US06', '25degC/552_Mixed7',
]

column_names = [
    'Time Stamp','Step','Status','Prog Time','Step Time','Cycle',
    'Cycle Level','Procedure','Voltage','Current','Temperature','Capacity','WhAccu','Cnt','Empty'
]

def load_and_preprocess_file(file_path):
    try:
        df = pd.read_csv(file_path, skiprows=30)
        df.columns = column_names
        if 'Empty' in df.columns:
            df = df.drop(columns=['Empty'])
        df = df[(df["Status"] == "TABLE") | (df["Status"] == "DCH")]
        df['Capacity'] = pd.to_numeric(df['Capacity'], errors='coerce')
        df.dropna(subset=['Capacity'], inplace=True)
        if not df.empty:
            max_discharge = abs(df["Capacity"].min())
            df["SoC Capacity"] = max_discharge + df["Capacity"]
            df["SoC Percentage"] = df["SoC Capacity"] / df["SoC Capacity"].max() if df["SoC Capacity"].max() != 0 else 0
            df["SoC Percentage"] = df["SoC Percentage"].clip(lower=0, upper=1)
        else:
            df["SoC Capacity"] = 0
            df["SoC Percentage"] = 0
        return df
    except Exception as e:
        print(f"An error occurred while processing {file_path}: {e}")
        return pd.DataFrame()

print("Loading and preprocessing training data...")
train_dataframes = []
for file_name in train_files:
    full_path = os.path.join(dataset_base_path, file_name + '.csv')
    df = load_and_preprocess_file(full_path)
    if not df.empty:
        df['Original_File'] = file_name
        train_dataframes.append(df)
train_df = pd.concat(train_dataframes, ignore_index=True)

print("Loading and preprocessing testing data...")
test_dataframes = []
for file_name in test_files:
    full_path = os.path.join(dataset_base_path, file_name + '.csv')
    df = load_and_preprocess_file(full_path)
    if not df.empty:
        df['Original_File'] = file_name
        test_dataframes.append(df)
test_df = pd.concat(test_dataframes, ignore_index=True)

print(f"Total training data shape: {train_df.shape}")
print(f"Total testing data shape: {test_df.shape}")
print("--- Finished: 1. Data Preprocessing ---\n")


# --- 2. Feature Engineering & Normalization ---
print("--- Starting: 2. Feature Engineering & Normalization ---")
rolling_window_size = 30

def apply_feature_engineering(df):
    df['Voltage'] = pd.to_numeric(df['Voltage'], errors='coerce')
    df['Current'] = pd.to_numeric(df['Current'], errors='coerce')
    df.dropna(subset=['Voltage', 'Current'], inplace=True)
    if df.empty: return df
    df['V_avg'] = df.groupby('Original_File')['Voltage'].transform(lambda x: x.rolling(window=rolling_window_size, min_periods=1).mean())
    df['I_avg'] = df.groupby('Original_File')['Current'].transform(lambda x: x.rolling(window=rolling_window_size, min_periods=1).mean())
    df['Power'] = df['Voltage'] * df['Current']
    return df

train_df = apply_feature_engineering(train_df.copy())
test_df = apply_feature_engineering(test_df.copy())

features = ['Current', 'Voltage', 'Temperature', 'V_avg', 'I_avg', 'Power']
target = 'SoC Percentage'

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(train_df[features].values)
y_train = train_df[target].values
X_test_scaled = scaler.transform(test_df[features].values)
y_test = test_df[target].values

print(f"Training features scaled. Shape: {X_train_scaled.shape}")
print(f"Testing features scaled. Shape: {X_test_scaled.shape}")
print("--- Finished: 2. Feature Engineering & Normalization ---\n")


# --- 3. Sequence Creation ---
print("--- Starting: 3. Sequence Creation ---")
sequence_length = 5

def create_sequences(X, y, seq_length):
    xs, ys = [], []
    for i in range(len(X) - seq_length):
        x_seq = X[i:(i + seq_length)]
        y_val = y[i + seq_length]
        xs.append(x_seq)
        ys.append(y_val)
    return np.array(xs), np.array(ys)

X_train_sequences, y_train_sequences = create_sequences(X_train_scaled, y_train, sequence_length)
X_test_sequences, y_test_sequences = create_sequences(X_test_scaled, y_test, sequence_length)

num_features = X_train_sequences.shape[2]

print(f"Training sequences created. Shape: {X_train_sequences.shape}")
print(f"Testing sequences created. Shape: {X_test_sequences.shape}")
print("--- Finished: 3. Sequence Creation ---\n")


# --- 4. Bayesian Optimization for BiLSTM Model ---
print("--- Starting: 4. Bayesian Optimization for BiLSTM ---")
X_train_tune, X_val_tune, y_train_tune, y_val_tune = train_test_split(
    X_train_sequences, y_train_sequences, test_size=0.2, random_state=42
)

def objective_bilstm(trial):
    bilstm_units = trial.suggest_categorical('bilstm_units', [30, 50, 70, 100])
    dense_units = trial.suggest_categorical('dense_units', [30, 50, 70, 100])
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])
    epochs = trial.suggest_int('epochs', 20, 100)

    model = Sequential([
        Bidirectional(LSTM(units=bilstm_units, activation='relu'), input_shape=(sequence_length, num_features)),
        Dropout(dropout_rate),
        Dense(units=dense_units, activation='relu'),
        Dense(units=1)
    ])
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=[RootMeanSquaredError(name='rmse')])
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        X_train_tune, y_train_tune,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val_tune, y_val_tune),
        callbacks=[early_stopping],
        verbose=0
    )
    val_rmse = history.history['val_rmse'][-1]
    trial.report(val_rmse, trial.number)
    if trial.should_prune():
        raise optuna.exceptions.TrialPruned()
    return val_rmse

study_bilstm = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))
study_bilstm.optimize(objective_bilstm, n_trials=20, show_progress_bar=True)

best_bilstm_params = study_bilstm.best_params
print("Best hyperparameters found for BiLSTM:")
print(best_bilstm_params)
print("--- Finished: 4. Bayesian Optimization for BiLSTM ---\n")


# --- 5. Final BiLSTM Model Training and Evaluation ---
print("--- Starting: 5. Final BiLSTM Model Training and Evaluation ---")
final_bilstm_params = best_bilstm_params.copy()
final_bilstm_params.setdefault('epochs', 50)
final_bilstm_params.setdefault('batch_size', best_bilstm_params.get('batch_size', 128))

model = Sequential([
    Bidirectional(LSTM(units=final_bilstm_params['bilstm_units'], activation='relu'), input_shape=(sequence_length, num_features)),
    Dropout(final_bilstm_params['dropout_rate']),
    Dense(units=final_bilstm_params['dense_units'], activation='relu'),
    Dense(units=1)
])
optimizer = Adam(learning_rate=final_bilstm_params['learning_rate'])
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=[RootMeanSquaredError(name='rmse')])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
reduce_lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=7, min_lr=1e-6)

print("Training final BiLSTM model...")
model.fit(
    X_train_sequences, y_train_sequences,
    epochs=final_bilstm_params['epochs'],
    batch_size=final_bilstm_params['batch_size'],
    validation_split=0.1,
    callbacks=[early_stopping, reduce_lr_on_plateau],
    verbose=1
)

print("\nEvaluating final BiLSTM model on test set...")
test_loss, test_rmse = model.evaluate(X_test_sequences, y_test_sequences, verbose=0)
y_pred = model.predict(X_test_sequences)
max_error = np.max(np.abs(y_test_sequences - y_pred.flatten()))

print("\n--- BiLSTM Final Performance ---")
print(f"Test RMSE: {test_rmse:.4f}")
print(f"Test Max Error: {max_error:.4f}")
print("--- Finished: 5. Final BiLSTM Model Training and Evaluation ---")

--- Starting: 1. Data Preprocessing ---
Loading and preprocessing training data...
Loading and preprocessing testing data...
Total training data shape: (1222903, 17)
Total testing data shape: (882077, 17)
--- Finished: 1. Data Preprocessing ---

--- Starting: 2. Feature Engineering & Normalization ---
Training features scaled. Shape: (1222903, 6)
Testing features scaled. Shape: (882077, 6)
--- Finished: 2. Feature Engineering & Normalization ---

--- Starting: 3. Sequence Creation ---
Training sequences created. Shape: (1222898, 5, 6)
Testing sequences created. Shape: (882072, 5, 6)
--- Finished: 3. Sequence Creation ---

--- Starting: 4. Bayesian Optimization for BiLSTM ---


[I 2025-08-05 16:42:54,197] A new study created in memory with name: no-name-77542168-567c-435a-9edc-06bb95dc0bef


  0%|          | 0/20 [00:00<?, ?it/s]

  super().__init__(**kwargs)


[I 2025-08-05 16:47:34,822] Trial 0 finished with value: 0.14437641203403473 and parameters: {'bilstm_units': 50, 'dense_units': 100, 'dropout_rate': 0.34044600469728353, 'learning_rate': 0.001331121608073689, 'batch_size': 128, 'epochs': 37}. Best is trial 0 with value: 0.14437641203403473.
[I 2025-08-05 16:51:51,519] Trial 1 finished with value: 0.043783873319625854 and parameters: {'bilstm_units': 100, 'dense_units': 70, 'dropout_rate': 0.21685785941408728, 'learning_rate': 0.00012562773503807024, 'batch_size': 128, 'epochs': 61}. Best is trial 1 with value: 0.043783873319625854.
[I 2025-08-05 16:59:34,480] Trial 2 finished with value: 0.06683692336082458 and parameters: {'bilstm_units': 70, 'dense_units': 70, 'dropout_rate': 0.2218455076693483, 'learning_rate': 1.9634341572933304e-05, 'batch_size': 64, 'epochs': 60}. Best is trial 1 with value: 0.043783873319625854.
[I 2025-08-05 17:11:57,119] Trial 3 finished with value: 0.17517346143722534 and parameters: {'bilstm_units': 50, 'de