# State-of-charge and Health Estimation for Lithium-ion EV Batteries Using Hybrid machine learning (LinearRegression)


In [12]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


In [2]:
# Load the dataset from CSV file
dataset = pd.read_csv(r"C:\Users\DELL\Desktop\B0006.csv")
dataset.head(10)

Unnamed: 0,cycle,ambient_temperature,datetime,capacity,voltage_measured,current_measured,temperature_measured,current_load,voltage_load,time
0,1,24,2008-04-02 15:25:41,2.035338,4.1798,-0.002366,24.277568,-0.0006,0.0,0.0
1,1,24,2008-04-02 15:25:41,2.035338,4.179823,0.000434,24.277073,-0.0006,4.195,16.781
2,1,24,2008-04-02 15:25:41,2.035338,3.966528,-2.014242,24.366226,-1.999,3.07,35.703
3,1,24,2008-04-02 15:25:41,2.035338,3.945886,-2.00873,24.515123,-1.999,3.045,53.781
4,1,24,2008-04-02 15:25:41,2.035338,3.930354,-2.013381,24.676053,-1.999,3.026,71.922
5,1,24,2008-04-02 15:25:41,2.035338,3.91744,-2.011192,24.853025,-1.999,3.011,90.094
6,1,24,2008-04-02 15:25:41,2.035338,3.906451,-2.010007,25.018929,-1.999,3.002,108.281
7,1,24,2008-04-02 15:25:41,2.035338,3.896511,-2.010279,25.204449,-1.999,2.993,126.453
8,1,24,2008-04-02 15:25:41,2.035338,3.887994,-2.007998,25.380117,-1.999,2.985,144.641
9,1,24,2008-04-02 15:25:41,2.035338,3.8802,-2.012029,25.554437,-1.999,2.977,162.844


In [3]:
# Assuming your dataset has columns for features (X) and target variables (y_soc and y_health)
X = dataset[['voltage_measured', 'current_measured','voltage_load','temperature_measured']]  # Replace feature1, feature2, feature3 with actual column names
y_soc = dataset['capacity']  # State-of-charge (SoC)
y_health = dataset['current_load']  # Battery health


In [4]:
# Split the data into training and testing sets
X_train, X_test, y_soc_train, y_soc_test, y_health_train, y_health_test = train_test_split(X, y_soc, y_health, test_size=0.2, random_state=42)
print(X_train)
print(X_test)
print(y_soc_train)
print(y_health_train)
print(y_soc_test)
print(y_health_test)

       voltage_measured  current_measured  voltage_load  temperature_measured
30430          3.498114         -2.008795         2.600             31.883454
18736          3.250992         -2.010925         2.344             36.238312
5579           3.487529         -2.010572         2.590             32.344841
21114          2.899828         -2.010298         2.003             38.743858
8891           3.040773         -0.002555         0.000             36.810168
...                 ...               ...           ...                   ...
11284          3.662172         -2.009209         2.757             28.395287
44732          3.313321         -2.010095         2.406             33.732525
38158          3.574697         -0.002585         0.000             37.011558
860            3.608433         -2.010376         2.715             31.319941
15795          3.736137         -2.009985         2.831             28.711407

[40228 rows x 4 columns]
       voltage_measured  current_measu

In [5]:
# Train a machine learning model for SoC estimation
soc_model = LinearRegression()
soc_model.fit(X_train, y_soc_train)

In [6]:
# Train  machine learning model for health estimation
health_model = LinearRegression()
health_model.fit(X_train, y_health_train)

In [7]:

# Making predictions
y_pred = soc_model.predict(X_test)


predictions_df = pd.DataFrame(y_pred, index=X_test.index, columns=['Predicted_capacity'])

# Print the DataFrame
print(predictions_df)


       Predicted_capacity
40407            1.505030
6890             1.553824
23770            1.200793
40914            1.448445
44860            1.446257
...                   ...
47150            1.500133
9097             1.543815
28713            1.359331
49232            1.497686
47181            1.512741

[10057 rows x 1 columns]


In [8]:
predictions_df = pd.DataFrame({
    'Actual capacity': (y_soc_test),

    'Predicted soc': abs(y_pred)
})

# Display the DataFrame
print(predictions_df)

       Actual capacity  Predicted soc
40407         1.310119       1.505030
6890          1.839251       1.553824
23770         1.473215       1.200793
40914         1.305216       1.448445
44860         1.253435       1.446257
...                ...            ...
47150         1.205616       1.500133
9097          1.760471       1.543815
28713         1.446867       1.359331
49232         1.164401       1.497686
47181         1.205616       1.512741

[10057 rows x 2 columns]


In [9]:
# Making predictions
y_pred = health_model.predict(X_test)


predictions_df = pd.DataFrame(y_pred, index=X_test.index, columns=['Predicted_current_load'])#current _load is  health of battery

# Print the DataFrame
print(predictions_df)


       Predicted_current_load
40407                1.733731
6890                 1.469382
23770                1.134932
40914               -0.388072
44860               -0.375125
...                       ...
47150                1.653020
9097                 1.482261
28713                0.157750
49232                1.667443
47181                1.540746

[10057 rows x 1 columns]


In [10]:
predictions_df = pd.DataFrame({
    'Actual current_load': (y_health_test),

    'Predicted soh': abs(y_pred)
})

# Display the DataFrame
print(predictions_df)

       Actual current_load  Predicted soh
40407               1.9988       1.733731
6890                1.9990       1.469382
23770               0.0006       1.134932
40914               0.0006       0.388072
44860               0.0006       0.375125
...                    ...            ...
47150               1.9988       1.653020
9097                1.9990       1.482261
28713               0.0008       0.157750
49232               1.9990       1.667443
47181               1.9990       1.540746

[10057 rows x 2 columns]


In [11]:
# Evaluate the models
soc_predictions = soc_model.predict(X_test)
soc_rmse = np.sqrt(mean_squared_error(y_soc_test, soc_predictions))

health_predictions = health_model.predict(X_test)
health_rmse = np.sqrt(mean_squared_error(y_health_test, health_predictions))

print("SoC RMSE:", soc_rmse)
print("Health RMSE:", health_rmse)

SoC RMSE: 0.21782532422605283
Health RMSE: 1.2162679649112798
