<a href="https://colab.research.google.com/github/rochan17/Battery-Life-Prediction/blob/master/Loading_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
from scipy.stats import skew,kurtosis
import pandas as pd

In [0]:
batch1 = pickle.load(open(r'batch1.pkl', 'rb'))
#remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

In [0]:
numBat1 = len(batch1.keys())
numBat1

In [0]:
batch2 = pickle.load(open(r'batch2.pkl','rb'))

In [0]:
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482];

In [0]:
for i, bk in enumerate(batch1_keys):
    batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
    for j in batch1[bk]['summary'].keys():
        if j == 'cycle':
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
        else:
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
    last_cycle = len(batch1[bk]['cycles'].keys())
    for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
        batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]

In [0]:
del batch2['b2c7']
del batch2['b2c8']
del batch2['b2c9']
del batch2['b2c15']
del batch2['b2c16']

In [0]:
numBat2 = len(batch2.keys())
numBat2

In [0]:
batch3 = pickle.load(open(r'batch3.pkl','rb'))
# remove noisy channels from batch3
del batch3['b3c37']
del batch3['b3c2']
del batch3['b3c23']
del batch3['b3c32']
del batch3['b3c38']
del batch3['b3c39']

In [0]:
numBat3 = len(batch3.keys())
numBat3

In [0]:
numBat = numBat1 + numBat2 + numBat3
numBat

In [0]:
bat_dict = {**batch1, **batch2, **batch3}

In [0]:
plt.figure(figsize=(15,10))
for i in bat_dict.keys():
    plt.plot(bat_dict[i]['summary']['cycle'], bat_dict[i]['summary']['QD'])
plt.xlabel('Cycle Number')
plt.ylabel('Discharge Capacity (Ah)')

In [0]:
test_ind = np.hstack((np.arange(0,(numBat1+numBat2),2),83))
train_ind = np.arange(1,(numBat1+numBat2-1),2)
secondary_test_ind = np.arange(numBat-numBat3,numBat)

In [0]:
bat_dict['b2c5']['summary']['QD'].size

In [0]:
bat_dict['b2c5']['cycles']['10']['Qd']

In [0]:
#Discharge capacity curves for the 100th and 10th cell for one randomly chosen cell

plt.plot(bat_dict['b2c5']['cycles']['100']['Qd'],bat_dict['b2c5']['cycles']['100']['V'],c='red')
plt.plot(bat_dict['b2c5']['cycles']['10']['Qd'],bat_dict['b2c5']['cycles']['10']['V'],c='blue')
plt.xlim((0,bat_dict['b2c5']['cycles']['10']['Qd'].max()))
plt.ylim((2,3.5))
plt.xticks(ticks=[0,0.5,1.0])
plt.yticks(ticks=[2.0,(2.0+3.5)/2,3.5])
plt.xlabel('Discharge Capacity(Ah)')
plt.ylabel('Voltage(V)')
plt.show()

**Creating Dataset**

In [0]:
plt.figure(figsize=(10,12))
plt.xlim(-0.15,0)
plt.ylim(2,3.5)

minimum_dQ_100_10 = np.zeros(len(bat_dict.keys()))
variance_dQ_100_10 = np.zeros(len(bat_dict.keys()))
skewness_dQ_100_10 = np.zeros(len(bat_dict.keys()))
kurtosis_dQ_100_10 = np.zeros(len(bat_dict.keys()))

for i, cell in enumerate(bat_dict.values()):
    c10 = cell['cycles']['10']
    c100 = cell['cycles']['100']
    dQ_100_10 = c100['Qdlin'] - c10['Qdlin']
    plt.plot(dQ_100_10,np.linspace(3.5,2,1000))
    
    minimum_dQ_100_10[i] = np.log(np.abs(np.min(dQ_100_10)))
    variance_dQ_100_10[i] = np.log(np.var(dQ_100_10))
    skewness_dQ_100_10[i] = np.log(np.abs(skew(dQ_100_10)))
    kurtosis_dQ_100_10[i] = np.log(np.abs(kurtosis(dQ_100_10)))

In [0]:
dataset=pd.DataFrame()

In [0]:
dataset['cell']=bat_dict.keys()

In [0]:
cl=[]
for i in bat_dict.keys():
    cl.append(bat_dict[i]['cycle_life'][0][0])
dataset['cell_life']=cl

In [0]:
dataset['minimum_dQ_100_10']=minimum_dQ_100_10
dataset['variance_dQ_100_10']=variance_dQ_100_10 
dataset['skewness_dQ_100_10']=skewness_dQ_100_10 
dataset['kurtosis_dQ_100_10']=kurtosis_dQ_100_10 

In [0]:
dataset.head()

In [0]:
plt.figure(figsize=(5,6))
plt.ylim(pow(10,2),pow(10,3.5))
plt.xlim(pow(10,-6),pow(10,-2))
plt.yscale(value='log')
plt.xscale(value='log')
plt.xticks(np.power(10,np.linspace(-6,-2,3)))
plt.scatter(x=(dataset['variance_dQ_100_10']),y=dataset['cell_life'])

**Discharge Capacity Fade Curve Features**

In [0]:
from sklearn.linear_model import LinearRegression

Slope_Cap_Fade_2_100=np.zeros(len(bat_dict.keys()))
Intercept_Cap_Fade_2_100=np.zeros(len(bat_dict.keys()))
Discharge_Cap_2=np.zeros(len(bat_dict.keys()))
Diff_MaxCap_DC2=np.zeros(len(bat_dict.keys()))


for i, cell in enumerate(bat_dict.values()):
    cap=cell['summary']['QD'][1:100].reshape(-1,1)
    cycle=cell['summary']['cycle'][1:100].reshape(-1,1)
    regressor=LinearRegression()
    regressor.fit(cycle,cap)
    
    Slope_Cap_Fade_2_100[i]=regressor.coef_[0]
    Intercept_Cap_Fade_2_100[i]=regressor.intercept_
    Discharge_Cap_2[i]=cap[0][0]
    Diff_MaxCap_DC2[i]=np.max(cap)-cap[0][0]
    
dataset['Slope_Cap_Fade_2_100']=Slope_Cap_Fade_2_100
dataset['Intercept_Cap_Fade_2_100']=Intercept_Cap_Fade_2_100
dataset['Discharge_Cap_2']=Discharge_Cap_2
dataset['Diff_MaxCap_DC2']=Diff_MaxCap_DC2

**Other Features**

In [0]:
#Other features
Avg_charge_time=np.zeros(len(bat_dict.keys()))
Temp_Integ_2_100=np.zeros(len(bat_dict.keys()))
Min_IR=np.zeros(len(bat_dict.keys()))
Diff_IR_2_100=np.zeros(len(bat_dict.keys()))

for i, cell in enumerate(bat_dict.values()):
    avg_time=np.mean(cell['summary']['chargetime'][1:6])
    a=cell['summary']['chargetime'][1:100]
    b=cell['summary']['Tavg'][1:100]
    integral=np.sum(np.multiply(a,b))
    IR_min=min(cell['summary']['IR'][1:100])
    IR_diff=cell['summary']['IR'][99]-cell['summary']['IR'][1]
    
    Avg_charge_time[i]=avg_time
    Temp_Integ_2_100[i]=integral
    Min_IR[i]=IR_min
    Diff_IR_2_100[i]=IR_diff

dataset['Avg_charge_time']=Avg_charge_time
dataset['Temp_Integ_2_100']=Temp_Integ_2_100
dataset['Min_IR']=Min_IR
dataset['Diff_IR_2_100']=Diff_IR_2_100  

In [0]:
dataset.head()

In [0]:
dataset.to_csv('final_dataset.csv',na_rep='Unknown',float_format='%.6f',index=False)