In [13]:
pip install seaborn

Note: you may need to restart the kernel to use updated packages.


 Step 1: Baseline Comparison (Linear Regression)

In [4]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import time

# Load data
data = pd.read_csv('B0005_Kalman_GA_Optimized.csv')

# Prepare features and target
features = ['Voltage_Min', 'Voltage_Max', 'Temperature_Avg', 'Capacity']
X = data[features]
y_true = data['SOH (%)']

# Measure time for training + prediction
start_time = time.time()

# Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X, y_true)
lr_predictions = lr_model.predict(X)

end_time = time.time()
latency_lr = end_time - start_time

# Evaluate metrics
mse_lr = mean_squared_error(y_true, lr_predictions)
r2_lr = r2_score(y_true, lr_predictions)

print(f'Linear Regression MSE: {mse_lr:.4f}')
print(f'Linear Regression R²: {r2_lr:.4f}')
print(f'Linear Regression Latency: {latency_lr:.4f} seconds')

# Save results for later use
data['LinearRegression_SOH_Pred'] = lr_predictions
data.to_csv('B0005_Baseline_LinearRegression.csv', index=False)

Linear Regression MSE: 0.0000
Linear Regression R²: 1.0000
Linear Regression Latency: 0.0289 seconds


Step 2: % Improvement Calculation

In [7]:
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score

# Load data
data = pd.read_csv('B0005_Kalman_GA_Optimized.csv')
baseline = pd.read_csv('B0005_Baseline_LinearRegression.csv')

# True values
y_true = data['SOH (%)']

# Predictions from different models
models = {
    'Linear Regression': baseline['LinearRegression_SOH_Pred'],
    'Random Forest': data['RandomForest_SOH_Pred'],
    'Gradient Boosting': data['GradientBoosting_SOH_Pred'],
    'Neural Network': data['NeuralNetwork_SOH_Pred'],
    'Hybrid Model': data['SOH_Kalman_Optimized']
}

# Latency values (in seconds) — add your actual recorded latencies here
latencies = {
    'Linear Regression': 0.012,  # from your previous output
    'Random Forest': 0.50,       # example values
    'Gradient Boosting': 0.65,
    'Neural Network': 1.20,
    'Hybrid Model': 2.80
}

# Prepare results list
results = []

# Calculate metrics
for name, preds in models.items():
    mse = mean_squared_error(y_true, preds)
    r2 = r2_score(y_true, preds)
    latency = latencies.get(name, None)
    results.append({'Model': name, 'MSE': mse, 'R²': r2, 'Latency (s)': latency})

# Create DataFrame
results_df = pd.DataFrame(results)

# Calculate % Improvement over Baseline
baseline_mse = results_df.loc[results_df['Model'] == 'Linear Regression', 'MSE'].values[0]
baseline_r2 = results_df.loc[results_df['Model'] == 'Linear Regression', 'R²'].values[0]
baseline_latency = results_df.loc[results_df['Model'] == 'Linear Regression', 'Latency (s)'].values[0]

results_df['MSE Improvement (%)'] = ((baseline_mse - results_df['MSE']) / baseline_mse) * 100
results_df['R² Improvement (%)'] = ((results_df['R²'] - baseline_r2) / baseline_r2) * 100
results_df['Latency Increase (%)'] = ((results_df['Latency (s)'] - baseline_latency) / baseline_latency) * 100

# Display the summary table
print(results_df)

# Save to CSV
results_df.to_csv('Model_Comparison_Summary.csv', index=False)

               Model           MSE        R²  Latency (s)  \
0  Linear Regression  1.430468e-28  1.000000        0.012   
1      Random Forest  2.376363e+00  0.977275        0.500   
2  Gradient Boosting  2.231254e+00  0.978663        0.650   
3     Neural Network  8.971257e+00  0.914210        1.200   
4       Hybrid Model  1.380552e+00  0.986798        2.800   

   MSE Improvement (%)  R² Improvement (%)  Latency Increase (%)  
0         0.000000e+00            0.000000              0.000000  
1        -1.661249e+30           -2.272466           4066.666667  
2        -1.559807e+30           -2.133702           5316.666667  
3        -6.271555e+30           -8.579026           9900.000000  
4        -9.651054e+29           -1.320193          23233.333333  
