<a href="https://colab.research.google.com/github/punjabinuclei/RealTimeBatteryMonitoringSystem/blob/main/ScriptWithFeatures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing Important Libraries**

In [None]:
import pandas as pd # data processing
import numpy as np # working with arrays

# Visualisation---------------------------------------------------------------------------------
from matplotlib import pyplot as plt
from termcolor import colored as cl # text customization

# preprocessData------------------------------------------------------------------------------------
from sklearn import preprocessing 
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# evaluation metric-----------------------------------------------------------------------------------
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score as r2_score 


# Models
# ---------------------------------------------------------------------------------------------------

# 1.OLS----------
from sklearn.linear_model import LinearRegression # OLS algorithm


# 2. Decision Tree--------
from sklearn.tree import DecisionTreeRegressor

# 3. Random Forest-------
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

# 4.Gradient Boosting-------
from sklearn.ensemble import GradientBoostingRegressor

#5. Extreme Gradient Boosting--------
import xgboost as xgb

# 6 and 7 For NN and DNN-------------
import keras
from keras.models import Sequential
from keras.layers import Dense

# 8 Nd 9. SVM
from sklearn.svm import SVR



# **Loading DataSet**

In [None]:
df = pd.read_csv("VAH07.csv")


# **Data PreProcessing**

Cleaned the data and removed unneccesary columns​
Added features like 

Mean, Median, Standard Deviation, Variance of Voltage-Temperature-Current

Power

Resistance

Conductance

Temp Change

Normalized the dataset

Removed columns with null values

In [None]:
# -----------------------------------------------------Removing Unwanted Columns--------------------------------------------------------
df = df.drop(['time_s', 'EnergyCharge_W_h', 'QCharge_mA_h', 'EnergyDischarge_W_h', 'cycleNumber', 'Ns'], axis=1)

# ----------------------------------------------------------SOC column-----------------------------------------------------
df['SOC%'] = ((3000-df['QDischarge_mA_h'])/3000)*100

# -----------------------------------------------------------Mean Calculation----------------------------------------------
def sliding_window_mean(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.mean(values[i:i+window_size])
    return result

# ---------------------------------------------------------------------
# Select the column of interest
column = df["Ecell_V"]

window_size = 10
mean_values = sliding_window_mean(column, window_size)

# Pad the mean_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(mean_values))
padding[:] = np.nan
mean_values = np.concatenate((mean_values, padding))

# Save the result to a new column in the DataFrame
df["EcellMean"] = mean_values

# ----------------------------------------------------------------------

# Select the column of interest
column = df["I_mA"]

window_size = 10
mean_values = sliding_window_mean(column, window_size)

# Pad the mean_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(mean_values))
padding[:] = np.nan
mean_values = np.concatenate((mean_values, padding))

# Save the result to a new column in the DataFrame
df["I_Mean"] = mean_values

# ----------------------------------------------------------------------
# Select the column of interest
column = df["Temperature__C"]

window_size = 10
mean_values = sliding_window_mean(column, window_size)

# Pad the mean_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(mean_values))
padding[:] = np.nan
mean_values = np.concatenate((mean_values, padding))

# Save the result to a new column in the DataFrame
df["TemperatureMean"] = mean_values

In [None]:
# ----------------------------------------------------------------------Median------------------------------------------------

def sliding_window_median(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.median(values[i:i+window_size])
    return result

# --------------------------------------------------------------


# Select the column of interest
column = df["Ecell_V"]

window_size = 10
median_values = sliding_window_median(column, window_size)

# Pad the median_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(median_values))
padding[:] = np.nan
median_values = np.concatenate((median_values, padding))

# Save the result to a new column in the DataFrame
df["EcellMedian"] = median_values



# -----------------------------------------------------------

# Select the column of interest
column = df["I_mA"]

window_size = 10
median_values = sliding_window_median(column, window_size)

# Pad the median_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(median_values))
padding[:] = np.nan
median_values = np.concatenate((median_values, padding))

# Save the result to a new column in the DataFrame
df["ImaMedian"] = median_values
df


# -------------------------------------------------------------------------------
# Select the column of interest
column = df["Temperature__C"]

window_size = 10
median_values = sliding_window_median(column, window_size)

# Pad the median_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(median_values))
padding[:] = np.nan
median_values = np.concatenate((median_values, padding))

# Save the result to a new column in the DataFrame
df["TempMedian"] = median_values
df


Unnamed: 0,Ecell_V,I_mA,QDischarge_mA_h,Temperature__C,SOC%,EcellMean,I_Mean,TemperatureMean,EcellMedian,ImaMedian,TempMedian
0,3.533041,1503.4052,0.0000,22.539068,100.00000,3.549227,1503.12920,23.030954,3.551082,1503.30655,23.008847
1,3.540132,1503.9963,0.0000,22.546963,100.00000,3.551914,1503.07008,23.138331,3.553032,1503.10945,23.060167
2,3.544544,1503.6022,0.0000,22.925943,100.00000,3.554021,1502.99125,23.255973,3.554706,1503.10945,23.225971
3,3.547577,1503.0110,0.0000,22.831200,100.00000,3.555829,1502.95182,23.364141,3.556243,1503.10945,23.391775
4,3.550019,1503.2079,0.0000,22.981213,100.00000,3.557503,1502.99124,23.472308,3.557700,1503.20790,23.498363
...,...,...,...,...,...,...,...,...,...,...,...
205482,3.270109,0.0000,1877.7213,21.228424,37.40929,,,,,,
205483,3.270227,0.0000,1877.7213,21.544243,37.40929,,,,,,
205484,3.270187,0.0000,1877.7213,21.378439,37.40929,,,,,,
205485,3.270187,0.0000,1877.7213,21.560034,37.40929,,,,,,


In [None]:
# ------------------------------------------------------Standard Deviatoion------------------------------------

def sliding_window_stddev(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.std(values[i:i+window_size])
    return result
# --------------------------------------------------

# Select the column of interest
column = df["Ecell_V"]

window_size = 10
stddev_values = sliding_window_stddev(column, window_size)

# Pad the stddev_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(stddev_values))
padding[:] = np.nan
stddev_values = np.concatenate((stddev_values, padding))

# Save the result to a new column in the DataFrame
df["Ecell_Vstd"] = stddev_values
df

# --------------------------------------------------------


# Select the column of interest
column = df["I_mA"]

window_size = 10
stddev_values = sliding_window_stddev(column, window_size)

# Pad the stddev_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(stddev_values))
padding[:] = np.nan
stddev_values = np.concatenate((stddev_values, padding))

# Save the result to a new column in the DataFrame
df["I_mA_std"] = stddev_values
df


# -----------------------------------------------------

# Select the column of interest
column = df["Temperature__C"]

window_size = 10
stddev_values = sliding_window_stddev(column, window_size)

# Pad the stddev_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(stddev_values))
padding[:] = np.nan
stddev_values = np.concatenate((stddev_values, padding))

# Save the result to a new column in the DataFrame
df["TempStd"] = stddev_values
df

In [None]:
# -------------------------------------------------------------Variance-----------------------------------------------------


def sliding_window_variance(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.var(values[i:i+window_size])
    return result


# -----------------------------------------------------------------

# Select the column of interest
column = df["Ecell_V"]

window_size = 10
variance_values = sliding_window_variance(column, window_size)

# Pad the variance_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(variance_values))
padding[:] = np.nan
variance_values = np.concatenate((variance_values, padding))

# Save the result to a new column in the DataFrame
df["Ecell_Variance"] = variance_values

# -------------------------------------------------------------------

# Select the column of interest
column = df["I_mA"]

window_size = 10
variance_values = sliding_window_variance(column, window_size)

# Pad the variance_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(variance_values))
padding[:] = np.nan
variance_values = np.concatenate((variance_values, padding))

# Save the result to a new column in the DataFrame
df["I_mA_Variance"] = variance_values

# ---------------------------------------------------------------------

# Select the column of interest
column = df["Temperature__C"]

window_size = 10
variance_values = sliding_window_variance(column, window_size)

# Pad the variance_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(variance_values))
padding[:] = np.nan
variance_values = np.concatenate((variance_values, padding))

# Save the result to a new column in the DataFrame
df["Temperature__C_Variance"] = variance_values



# --------------------------------------------------------Power--------------------------------------------------------

# Select the columns of interest
voltage_column = df["Ecell_V"]
current_column = df["I_mA"]

# Multiply the columns to get the power
power_column = (voltage_column * current_column)/1000

# Save the result to a new column in the DataFrame
df["Power"] = power_column
df


# ---------------------------------------------------------Resistance--------------------------------------------------


# Select the columns of interest
voltage_column = df["Ecell_V"]
current_column = df["I_mA"]

# Multiply the columns to get the power
resistance_column = (voltage_column / current_column)*1000

# Save the result to a new column in the DataFrame
df["Resistance"] = resistance_column
df


# ----------------------------------------------------------Conductance------------------------------------------------




conductance_column = 1/resistance_column

# Save the result to a new column in the DataFrame
df["Conductance"] = conductance_column
df




# ---------------------------------------------------------Temperature Differnce---------------------------------------------
# Select the column of interest
temp_column = df["Temperature__C"]

# Calculate the difference between consecutive rows
temp_change = temp_column.diff()
# Fill the first row with 0
temp_change.iloc[0] = 0


# Save the result to a new column in the DataFrame
df["temp_change"] = temp_change
df

In [None]:

# Replace inf values with NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)



# Remove rows containing NaN values
df.dropna(inplace=True)


# Initialize the scaler
scaler = MinMaxScaler()

# Fit and transform the data
df_scaled = scaler.fit_transform(df)

# Put the transformed data back into a dataframe
df = pd.DataFrame(df_scaled, columns=df.columns)


index1=0
index2=19633
index3=19907
index4=20181
index5=20454
index6=20728
df

## **Feature Selection**

In [None]:
# Slice the DataFrame to select the desired range of data
df_training = df.iloc[index5+1:index6]
features = ['Ecell_V','I_mA', 'Temperature__C','EcellMean','I_Mean','TemperatureMean','EcellMedian','ImaMedian','TempMedian','Ecell_Vstd','I_mA_std','TempStd','Ecell_Variance','I_mA_Variance','Temperature__C_Variance','Power','Resistance','Conductance','temp_change']
X = df_training.loc[:, features]
y = df_training.loc[:, ['SOC%']]
df_training.shape

In [None]:

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)


In [None]:
# Table
dfAnswer = pd.DataFrame(columns=['Algo', 'RMSE', 'MAE', 'R-Squared'])

## **Modeling**

In [None]:
# MODELING

# 1.___________________________________OLS______________________________________________________________

ols = LinearRegression()
ols.fit(X_train, y_train)
ols_yhat_test = ols.predict(X_test)

In [None]:
# Predict the output for the test set
y_pred = ols.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)


dfAnswer = dfAnswer.append({'Algo': 'ols', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)



In [None]:
#2 ________________________________Decision Tree______________________________________________________________________________


# Create decision tree regressor
decReg = DecisionTreeRegressor()

# Fit regressor to training data
decReg.fit(X_train, y_train)




In [None]:
# Predict the output for the test set
y_pred = decReg.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'DecisionTree', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)


In [None]:
# MODELING


# 3------------------------------------------------RandomForest---------------------------------------------------------------

# create regressor object
randomForestReg = RandomForestRegressor()
 
# fit the regressor with x and y data
randomForestReg.fit(X_train, y_train) 



In [None]:
# Predict the output for the test set
y_pred = randomForestReg.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'RandomForest', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)


In [None]:
# MODELLING

# ___________________________________________________________GradientBoostingRegression--------------------------------------

# Instantiate Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators = 32, max_depth = 1, random_state = 1)
  
# Fit to training set
gbr.fit(X_train, y_train)
  


In [None]:
# Predict the output for the test set
y_pred = gbr.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'GradientBoosting', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)


In [None]:
# ___________________________________XTREME GRADIENT BOOSTING_____________________________________________________________

# Create XGBoost regressor
xgbr = xgb.XGBRegressor()

# Fit regressor to training data
xgbr.fit(X_train, y_train)




In [None]:
# Predict the output for the test set
y_pred = xgbr.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'XtremeGradientBoost', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)


In [None]:

# -----------------------------------------------Neural Network----------------------------------------------------------


# Define the Shallow Neural Network (NN)
nn_model = Sequential()
nn_model.add(Dense(500, activation='relu', input_shape=(19,)))
nn_model.add(Dense(1, activation='linear'))
nn_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the NN
nn_history = nn_model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)


In [None]:
 # Predict the output for the test set
y_pred = nn_model.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'NN', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)


In [None]:


# ----------------------------------------------DEEP NEURAL NETWORK----------------------------------------------------------

# Define the Deep Neural Network (DNN)
dnn_model = Sequential()
dnn_model.add(Dense(500, activation='relu', input_shape=(19,)))
dnn_model.add(Dense(500, activation='relu'))
dnn_model.add(Dense(500, activation='relu'))
dnn_model.add(Dense(500, activation='relu'))
dnn_model.add(Dense(1, activation='linear'))
dnn_model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Train the DNN
dnn_history = dnn_model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

In [None]:

# Predict the output for the test set
y_pred = dnn_model.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'DNN', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)


In [None]:
# _____________________________________________________SVM USING LINEAR KERNEL------------------------------------


linear_svr = SVR(kernel='linear', cache_size=2097152)

# Fit the model to the training data
linear_svr.fit(X_train, y_train)

In [None]:
y_pred = linear_svr.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'SVM Linear', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)


In [None]:
# Create a support vector machine with a radial-basis function (RBF) kernel
rbf_svr = SVR(kernel='rbf', cache_size=2097152)

# Fit the model to the training data
rbf_svr.fit(X_train, y_train)


In [None]:
y_pred = rbf_svr.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'SVM RBF', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2}, ignore_index=True)

print(dfAnswer)
