# **Importing Important Libraries**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: ignored

In [None]:
import pandas as pd # data processing
import numpy as np # working with arrays

# Visualisation---------------------------------------------------------------------------------
from matplotlib import pyplot as plt
from termcolor import colored as cl # text customization

# preprocessData------------------------------------------------------------------------------------
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# evaluation metric-----------------------------------------------------------------------------------
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score as r2_score


# Models
# ---------------------------------------------------------------------------------------------------

# 1.OLS----------
from sklearn.linear_model import LinearRegression # OLS algorithm


# 2. Decision Tree--------
from sklearn.tree import DecisionTreeRegressor

# 3. Random Forest-------
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

# 4.Gradient Boosting-------
from sklearn.ensemble import GradientBoostingRegressor

#5. Extreme Gradient Boosting--------
import xgboost as xgb

# 6 and 7 For NN and DNN-------------
import keras
from keras.models import Sequential
from keras.layers import Dense

# 8 Nd 9. SVM
from sklearn.svm import SVR



# **Loading DataSet**

In [None]:
df = pd.read_csv("/content/drive/My Drive/VAH01.csv")


# **Data PreProcessing**

Cleaned the data and removed unneccesary columns​
Added features like

Mean, Median, Standard Deviation, Variance of Voltage-Temperature-Current

Power

Resistance

Conductance

Temp Change

Normalized the dataset

Removed columns with null values

In [None]:
# -----------------------------------------------------Removing Unwanted Columns--------------------------------------------------------
df = df.drop(['time_s', 'EnergyCharge_W_h', 'QCharge_mA_h', 'EnergyDischarge_W_h', 'cycleNumber', 'Ns'], axis=1)

# ----------------------------------------------------------SOC column-----------------------------------------------------
df['SOC%'] = ((3000-df['QDischarge_mA_h'])/3000)*100

# -----------------------------------------------------------Mean Calculation----------------------------------------------
def sliding_window_mean(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.mean(values[i:i+window_size])
    return result

# ---------------------------------------------------------------------
# Select the column of interest
column = df["Ecell_V"]

window_size = 10
mean_values = sliding_window_mean(column, window_size)

# Pad the mean_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(mean_values))
padding[:] = np.nan
mean_values = np.concatenate((mean_values, padding))

# Save the result to a new column in the DataFrame
df["EcellMean"] = mean_values

# ----------------------------------------------------------------------

# Select the column of interest
column = df["I_mA"]

window_size = 10
mean_values = sliding_window_mean(column, window_size)

# Pad the mean_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(mean_values))
padding[:] = np.nan
mean_values = np.concatenate((mean_values, padding))

# Save the result to a new column in the DataFrame
df["I_Mean"] = mean_values

# ----------------------------------------------------------------------
# Select the column of interest
column = df["Temperature__C"]

window_size = 10
mean_values = sliding_window_mean(column, window_size)

# Pad the mean_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(mean_values))
padding[:] = np.nan
mean_values = np.concatenate((mean_values, padding))

# Save the result to a new column in the DataFrame
df["TemperatureMean"] = mean_values

In [None]:
# ----------------------------------------------------------------------Median------------------------------------------------

def sliding_window_median(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.median(values[i:i+window_size])
    return result

# --------------------------------------------------------------


# Select the column of interest
column = df["Ecell_V"]

window_size = 10
median_values = sliding_window_median(column, window_size)

# Pad the median_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(median_values))
padding[:] = np.nan
median_values = np.concatenate((median_values, padding))

# Save the result to a new column in the DataFrame
df["EcellMedian"] = median_values



# -----------------------------------------------------------

# Select the column of interest
column = df["I_mA"]

window_size = 10
median_values = sliding_window_median(column, window_size)

# Pad the median_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(median_values))
padding[:] = np.nan
median_values = np.concatenate((median_values, padding))

# Save the result to a new column in the DataFrame
df["ImaMedian"] = median_values
df


# -------------------------------------------------------------------------------
# Select the column of interest
column = df["Temperature__C"]

window_size = 10
median_values = sliding_window_median(column, window_size)

# Pad the median_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(median_values))
padding[:] = np.nan
median_values = np.concatenate((median_values, padding))

# Save the result to a new column in the DataFrame
df["TempMedian"] = median_values
df


In [None]:
# ------------------------------------------------------Standard Deviatoion------------------------------------

def sliding_window_stddev(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.std(values[i:i+window_size])
    return result
# --------------------------------------------------

# Select the column of interest
column = df["Ecell_V"]

window_size = 10
stddev_values = sliding_window_stddev(column, window_size)

# Pad the stddev_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(stddev_values))
padding[:] = np.nan
stddev_values = np.concatenate((stddev_values, padding))

# Save the result to a new column in the DataFrame
df["Ecell_Vstd"] = stddev_values
df

# --------------------------------------------------------


# Select the column of interest
column = df["I_mA"]

window_size = 10
stddev_values = sliding_window_stddev(column, window_size)

# Pad the stddev_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(stddev_values))
padding[:] = np.nan
stddev_values = np.concatenate((stddev_values, padding))

# Save the result to a new column in the DataFrame
df["I_mA_std"] = stddev_values
df


# -----------------------------------------------------

# Select the column of interest
column = df["Temperature__C"]

window_size = 10
stddev_values = sliding_window_stddev(column, window_size)

# Pad the stddev_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(stddev_values))
padding[:] = np.nan
stddev_values = np.concatenate((stddev_values, padding))

# Save the result to a new column in the DataFrame
df["TempStd"] = stddev_values
df

Unnamed: 0,Ecell_V,I_mA,QDischarge_mA_h,Temperature__C,SOC%,EcellMean,I_Mean,TemperatureMean,EcellMedian,ImaMedian,TempMedian,Ecell_Vstd,I_mA_std,TempStd
0,3.538717,1491.3828,0.000000,21.139246,100.000000,3.554757,1491.99352,21.653207,3.556596,1491.67830,21.657160,0.007640,0.891832,0.369085
1,3.545570,1492.1708,0.000000,21.115524,100.000000,3.557431,1492.07233,21.777349,3.558487,1491.97385,21.791581,0.006078,0.868909,0.383817
2,3.549980,1491.5797,0.000000,21.305294,100.000000,3.559566,1492.01323,21.896746,3.560180,1491.67835,21.922049,0.005226,0.880226,0.342895
3,3.553131,1492.3679,0.000000,21.574135,100.000000,3.561413,1491.91473,22.016934,3.561775,1491.67835,22.068330,0.004754,0.973409,0.324673
4,3.555572,1490.9886,0.000000,21.510880,100.000000,3.563087,1491.79652,22.117354,3.563311,1491.57975,22.151355,0.004482,0.982928,0.327473
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1091472,2.843005,0.0000,2517.112065,23.076487,16.096265,,,,,,,,,
1091473,2.843793,0.0000,2517.112065,22.965786,16.096265,,,,,,,,,
1091474,2.844738,0.0000,2517.112065,22.696945,16.096265,,,,,,,,,
1091475,2.845565,0.0000,2517.112065,22.847181,16.096265,,,,,,,,,


In [None]:
# -------------------------------------------------------------Variance-----------------------------------------------------


def sliding_window_variance(values, window_size):
    result = np.zeros(len(values) - window_size + 1)
    for i in range(len(result)):
        result[i] = np.var(values[i:i+window_size])
    return result


# -----------------------------------------------------------------

# Select the column of interest
column = df["Ecell_V"]

window_size = 10
variance_values = sliding_window_variance(column, window_size)

# Pad the variance_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(variance_values))
padding[:] = np.nan
variance_values = np.concatenate((variance_values, padding))

# Save the result to a new column in the DataFrame
df["Ecell_Variance"] = variance_values

# -------------------------------------------------------------------

# Select the column of interest
column = df["I_mA"]

window_size = 10
variance_values = sliding_window_variance(column, window_size)

# Pad the variance_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(variance_values))
padding[:] = np.nan
variance_values = np.concatenate((variance_values, padding))

# Save the result to a new column in the DataFrame
df["I_mA_Variance"] = variance_values

# ---------------------------------------------------------------------

# Select the column of interest
column = df["Temperature__C"]

window_size = 10
variance_values = sliding_window_variance(column, window_size)

# Pad the variance_values array with NaNs to match the length of the DataFrame's index
padding = np.empty(len(df.index) - len(variance_values))
padding[:] = np.nan
variance_values = np.concatenate((variance_values, padding))

# Save the result to a new column in the DataFrame
df["Temperature__C_Variance"] = variance_values



# --------------------------------------------------------Power--------------------------------------------------------

# Select the columns of interest
voltage_column = df["Ecell_V"]
current_column = df["I_mA"]

# Multiply the columns to get the power
power_column = (voltage_column * current_column)/1000

# Save the result to a new column in the DataFrame
df["Power"] = power_column
df


# ---------------------------------------------------------Resistance--------------------------------------------------


# Select the columns of interest
voltage_column = df["Ecell_V"]
current_column = df["I_mA"]

# Multiply the columns to get the power
resistance_column = (voltage_column / current_column)*1000

# Save the result to a new column in the DataFrame
df["Resistance"] = resistance_column
df


# ----------------------------------------------------------Conductance------------------------------------------------




conductance_column = 1/resistance_column

# Save the result to a new column in the DataFrame
df["Conductance"] = conductance_column
df




# ---------------------------------------------------------Temperature Differnce---------------------------------------------
# Select the column of interest
temp_column = df["Temperature__C"]

# Calculate the difference between consecutive rows
temp_change = temp_column.diff()
# Fill the first row with 0
temp_change.iloc[0] = 0


# Save the result to a new column in the DataFrame
df["temp_change"] = temp_change
df

Unnamed: 0,Ecell_V,I_mA,QDischarge_mA_h,Temperature__C,SOC%,EcellMean,I_Mean,TemperatureMean,EcellMedian,ImaMedian,...,Ecell_Vstd,I_mA_std,TempStd,Ecell_Variance,I_mA_Variance,Temperature__C_Variance,Power,Resistance,Conductance,temp_change
0,3.538717,1491.3828,0.000000,21.139246,100.000000,3.554757,1491.99352,21.653207,3.556596,1491.67830,...,0.007640,0.891832,0.369085,0.000058,0.795365,0.136224,5.277582,2.372776,0.421447,0.000000
1,3.545570,1492.1708,0.000000,21.115524,100.000000,3.557431,1492.07233,21.777349,3.558487,1491.97385,...,0.006078,0.868909,0.383817,0.000037,0.755003,0.147316,5.290596,2.376115,0.420855,-0.023722
2,3.549980,1491.5797,0.000000,21.305294,100.000000,3.559566,1492.01323,21.896746,3.560180,1491.67835,...,0.005226,0.880226,0.342895,0.000027,0.774799,0.117577,5.295079,2.380014,0.420166,0.189770
3,3.553131,1492.3679,0.000000,21.574135,100.000000,3.561413,1491.91473,22.016934,3.561775,1491.67835,...,0.004754,0.973409,0.324673,0.000023,0.947524,0.105413,5.302578,2.380868,0.420015,0.268841
4,3.555572,1490.9886,0.000000,21.510880,100.000000,3.563087,1491.79652,22.117354,3.563311,1491.57975,...,0.004482,0.982928,0.327473,0.000020,0.966148,0.107239,5.301318,2.384708,0.419339,-0.063255
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1091472,2.843005,0.0000,2517.112065,23.076487,16.096265,,,,,,...,,,,,,,0.000000,inf,0.000000,0.221400
1091473,2.843793,0.0000,2517.112065,22.965786,16.096265,,,,,,...,,,,,,,0.000000,inf,0.000000,-0.110701
1091474,2.844738,0.0000,2517.112065,22.696945,16.096265,,,,,,...,,,,,,,0.000000,inf,0.000000,-0.268841
1091475,2.845565,0.0000,2517.112065,22.847181,16.096265,,,,,,...,,,,,,,0.000000,inf,0.000000,0.150236


In [None]:

# Replace inf values with NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)



# Remove rows containing NaN values
df.dropna(inplace=True)


# Initialize the scaler
scaler = MinMaxScaler()

# Fit and transform the data
df_scaled = scaler.fit_transform(df)

# Put the transformed data back into a dataframe
df = pd.DataFrame(df_scaled, columns=df.columns)


index1=0
index2=20998
index3=22729
index4=24457
index5=26176
index6=27894
df

## **Feature Selection**

In [None]:
# Slice the DataFrame to select the desired range of data
df_training = df.iloc[index1:index2]
features = ['Ecell_V','I_mA', 'Temperature__C','EcellMean','I_Mean','TemperatureMean','EcellMedian','ImaMedian','TempMedian','Ecell_Vstd','I_mA_std','TempStd','Ecell_Variance','I_mA_Variance','Temperature__C_Variance','Power','Resistance','Conductance','temp_change']
# features = ['Ecell_V','I_mA', 'Temperature__C']
X = df_training.loc[:, features]
y = df_training.loc[:, ['SOC%']]
df_training.shape

In [None]:

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=1)


In [None]:
# Table
dfAnswer = pd.DataFrame(columns=['Algo', 'RMSE', 'MAE', 'R-Squared', 'ExecutionTime'])

## **Modeling**

In [None]:
import time


In [None]:
# MODELING

start_time = time.time()

# 1.___________________________________OLS______________________________________________________________

ols = LinearRegression()
ols.fit(X_train, y_train)
ols_yhat_test = ols.predict(X_test)

end_time = time.time()

elapsed_time = end_time - start_time


In [None]:
# Predict the output for the test set
y_pred = ols.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)


dfAnswer = dfAnswer.append({'Algo': 'ols', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)



In [None]:
#2 ________________________________Decision Tree______________________________________________________________________________

start_time = time.time()

# Create decision tree regressor
decReg = DecisionTreeRegressor()

# Fit regressor to training data
decReg.fit(X_train, y_train)

end_time = time.time()

elapsed_time = end_time - start_time



In [None]:
# Predict the output for the test set
y_pred = decReg.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'DecisionTree', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)


In [None]:
# MODELING

start_time = time.time()

# 3------------------------------------------------RandomForest---------------------------------------------------------------

# create regressor object
randomForestReg = RandomForestRegressor()

# fit the regressor with x and y data
randomForestReg.fit(X_train, y_train)


end_time = time.time()

elapsed_time = end_time - start_time



In [None]:
# Predict the output for the test set
y_pred = randomForestReg.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'RandomForest', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time }, ignore_index=True)

print(dfAnswer)


In [None]:
# MODELLING
start_time = time.time()
# ___________________________________________________________GradientBoostingRegression--------------------------------------

# Instantiate Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators = 32, max_depth = 1, random_state = 1)

# Fit to training set
gbr.fit(X_train, y_train)

end_time = time.time()
elapsed_time = end_time - start_time




In [None]:
# Predict the output for the test set
y_pred = gbr.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'GradientBoosting', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)


In [None]:
# ___________________________________XTREME GRADIENT BOOSTING_____________________________________________________________


start_time = time.time()


# Create XGBoost regressor
xgbr = xgb.XGBRegressor()

# Fit regressor to training data
xgbr.fit(X_train, y_train)

end_time = time.time()
elapsed_time = end_time - start_time



In [None]:
# Predict the output for the test set
y_pred = xgbr.predict(X_test)

# Rmse
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'XtremeGradientBoost', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)


In [None]:

# -----------------------------------------------Neural Network----------------------------------------------------------
start_time = time.time()



# Define the Shallow Neural Network (NN)
nn_model = Sequential()
nn_model.add(Dense(500, activation='relu', input_shape=(19,)))
nn_model.add(Dense(1, activation='linear'))
nn_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the NN
nn_history = nn_model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)


end_time = time.time()
elapsed_time = end_time - start_time


In [None]:
 # Predict the output for the test set
y_pred = nn_model.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'NN', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)


In [None]:

start_time = time.time()


# ----------------------------------------------DEEP NEURAL NETWORK----------------------------------------------------------

# Define the Deep Neural Network (DNN)
dnn_model = Sequential()
dnn_model.add(Dense(500, activation='relu', input_shape=(19,)))
dnn_model.add(Dense(500, activation='relu'))
dnn_model.add(Dense(500, activation='relu'))
dnn_model.add(Dense(500, activation='relu'))
dnn_model.add(Dense(1, activation='linear'))
dnn_model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Train the DNN
dnn_history = dnn_model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

end_time = time.time()
elapsed_time = end_time - start_time


In [None]:

# Predict the output for the test set
y_pred = dnn_model.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'DNN', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)


In [None]:
# _____________________________________________________SVM USING LINEAR KERNEL------------------------------------
start_time = time.time()


linear_svr = SVR(kernel='linear', cache_size=2097152)

# Fit the model to the training data
linear_svr.fit(X_train, y_train)

end_time = time.time()
elapsed_time = end_time - start_time


In [None]:

y_pred = linear_svr.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'SVM Linear', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)


In [None]:

# Create a support vector machine with a radial-basis function (RBF) kernel
rbf_svr = SVR(kernel='rbf', cache_size=2097152)

# Fit the model to the training data
rbf_svr.fit(X_train, y_train)


In [None]:
y_pred = rbf_svr.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# Calculate the R2 score
r2 = r2_score(y_test, y_pred)

dfAnswer = dfAnswer.append({'Algo': 'SVM RBF', 'RMSE': rmse, 'MAE':mae, 'R-Squared':r2, 'ExecutionTime':elapsed_time}, ignore_index=True)

print(dfAnswer)
