In [26]:
# important library
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import rcParams
from matplotlib.cm import rainbow
import warnings
warnings.filterwarnings("ignore")


# 1- Loading libaries and Dataset

In [27]:
# * Predict the remaining useful life (RUL) of the batteries.

In [28]:
# loading Dataset
df=pd.read_csv("../input/battery-remaining-useful-life-rul/Battery_RUL.csv")
df.head(25)


In [29]:
df.columns

# 2- Data Exploration

In [30]:
df = df.round(1)
df

Data validation and preprocessing

In [31]:
df.info()
#Lets now check for null fields
import seaborn as sns
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='viridis')
df.isnull().sum()

* NO null values

In [32]:
# Duplicates VALUE 
print(f'Duplicates in the dataset: {df.duplicated().sum()}')
print(f'Percentage of duplicates: {df.duplicated().sum()/len(df)*100}%') # if 0.0 % that means No Duplicate data

*  In datasetb, there are no duplicate values.

In [33]:
#Cardinality 
df.nunique() # To determine the maximum and minimum number of variations in each column of the dataset

# 3- Statistics

In [34]:
# data describtio
df.describe().T.style.background_gradient()# Significant higher and lower values in a dataset are highlighted
 

# 4- Data Engineering

In [35]:
# Let's have a look at the numerical value density.
df.plot(kind='density', subplots=True, layout=(10,2), sharex=False, 
                     sharey=False,fontsize=6, figsize=(10,10))# Let's have a look at the numerical value density.

* Let's figure out how to interlink column corelatons for a better understanding.

In [36]:
# heatmap graph for finding  correlation of column 
print("HEATMAP")
#get the correlation
sns.set_context('poster', font_scale=0.5)
plt.figure(figsize=(20,12))
cor = df.corr()
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
plt.show()

*  # Output variable columns have a correlation.

In [37]:
#Correlation with output variable
cor_target = abs(cor["Time constant current (s)"])
#Selecting highly correlated features
relevant_features = cor_target #[cor_target>0.5]
relevant_features


In [38]:
relevant_features = cor_target [cor_target>0.5]
relevant_features

*  The following columns have a strong relationship.
[Time to Discharge (s)] 0.912536

[Time (s) at 4.15V] 0.568922

[Constant current time (s)] 1.000000

[Charging time in seconds] 0.953522

In [39]:
# High Correlation Columns Heatmap¶
plt.figure(figsize=(15,8))
sns.set_context('poster', font_scale=0.8)
sns.heatmap(df.corr(),annot=True, cbar=False, cmap='Blues', fmt='.1f')

* Observations:

RUL's most essential factors ( Predict the remaining useful life ) is , Time constant current (s), Charging time (s), and Maximum Voltage Discharge (V) in Time at 4.15V (s)' Reduce the voltage to 3.6-3.4V. (s)


# Examine the Contribution of Each Column in the Dataset

In [40]:
#study the data
sns.set_context('poster', font_scale=0.5)
df.hist(bins=25, grid=False, figsize=(25,18), color='#86bf91', zorder=2, rwidth=0.9)
plt.show()

# 5- Data Visualization

In [41]:
# data visualisation on [ Min. Voltage Charg. (V) ]
sns.set_context('poster', font_scale=0.8)
df.groupby('Min. Voltage Charg. (V)').mean().plot(kind='bar', figsize=(20,15)) 
plt.xlabel('Min. Voltage Charg. (V)')
#plt.ylabel('')
plt.title('Min. Voltage Charg. (V)')
plt.show()


* Factors to Consider:

The colummn parametor Min. Voltage Charg. (V) is dependent on voltage 3.0>>3.1>>3.3>>3.2.

In [42]:
# data visualisation on [ Max. Voltage Dischar. (V) ]
sns.set_context('poster', font_scale=0.8)
df.groupby('Max. Voltage Dischar. (V)').mean().plot(kind='bar', figsize=(20,15)) 
plt.xlabel('Max. Voltage Dischar. (V)')
#plt.ylabel('')
plt.title('Max. Voltage Dischar. (V)')
plt.show()


* * Factors to Consider:

The colummn parametor Max. Voltage Discharge (V) is dependent on the voltage 4.4>>4.3>>4.2.

In [47]:
import plotly.express as px
import plotly.graph_objects as go


In [48]:
 px.line(df, x='Cycle_Index', y='Time constant current (s)')

In [49]:
fig = go.Figure(data=[go.Histogram(x=df, cumulative_enabled=True)])
fig.show()

In [64]:
fig = px.histogram(df, x="Max. Voltage Dischar. (V)", nbins=30)
fig.show()


* * Factors to Consider:

The maximum voltage discharge (V) is 3.9 volts.

In [60]:
fig = px.histogram(df, x="Min. Voltage Charg. (V)", nbins=30)
fig.show()


* Factors to Consider:

The maximum charging voltage (V) is 3.6 volts.

# 6-Epilogue

Time constant current (s), Charging time (s), and Maximum Voltage Discharge (V) in Time at 4.15V (s) are the most important RUL variables (predict the remaining useful life). Lower the voltage to around 3.6-3.4 volts (s)
The column parameter Min. Voltage Charg. (V) is affected by the voltage 3.0>>3.1>>3.3>>
3.2. The column parametor Max. Voltage Discharge (V) is proportional to the voltage 4.4>>4.3>>
3.9 volts is the highest voltage discharge (V).
3.6 volts (V) is the maximum charging voltage.