In [108]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline
from scipy.stats import skew, kurtosis
from scipy.integrate import simpson
import pickle
import pandas as pd

## Load and combine raw data from all three batches

In [2]:
batch1 = pickle.load(open(r'..\Data\batch1.pkl', 'rb'))
#remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

In [3]:
numBat1 = len(batch1.keys())
numBat1

41

In [4]:
batch2 = pickle.load(open(r'..\Data\batch2.pkl','rb'))

In [5]:
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482];

In [6]:
for i, bk in enumerate(batch1_keys):
    batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
    for j in batch1[bk]['summary'].keys():
        if j == 'cycle':
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
        else:
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
    last_cycle = len(batch1[bk]['cycles'].keys())
    for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
        batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]

In [7]:
del batch2['b2c7']
del batch2['b2c8']
del batch2['b2c9']
del batch2['b2c15']
del batch2['b2c16']

In [8]:
numBat2 = len(batch2.keys())
numBat2

43

In [9]:
batch3 = pickle.load(open(r'..\Data\batch3.pkl','rb'))
# remove noisy channels from batch3
del batch3['b3c37']
del batch3['b3c2']
del batch3['b3c23']
del batch3['b3c32']
del batch3['b3c42']
del batch3['b3c43']

In [10]:
numBat3 = len(batch3.keys())
numBat3

40

In [11]:
numBat = numBat1 + numBat2 + numBat3
numBat

124

In [12]:
bat_dict = {**batch1, **batch2, **batch3}

## Build dictionary 

In [118]:
def get_var(id):
    diff = bat_dict[id]["cycles"]["99"]["Qdlin"]-bat_dict[id]["cycles"]["9"]["Qdlin"]
    return np.log10(np.var(diff))

def get_min(id):
    diff = bat_dict[id]["cycles"]["99"]["Qdlin"]-bat_dict[id]["cycles"]["9"]["Qdlin"]
    return np.log10(np.abs(np.min(diff)))

def get_mean(id):
    diff = bat_dict[id]["cycles"]["99"]["Qdlin"]-bat_dict[id]["cycles"]["9"]["Qdlin"]
    return np.log10(np.abs(np.mean(diff)))

def get_skew(id):
    diff = bat_dict[id]["cycles"]["99"]["Qdlin"]-bat_dict[id]["cycles"]["9"]["Qdlin"]
    return np.log10(np.abs(skew(diff)))

def get_kurt(id):
    diff = bat_dict[id]["cycles"]["99"]["Qdlin"]-bat_dict[id]["cycles"]["9"]["Qdlin"]
    return np.log10(np.abs(kurtosis(diff)))

In [128]:
feature_dict = {}
for id in bat_dict.keys():
    feature_dict[id] = {"cycle_life":bat_dict[id]["cycle_life"][0][0]}

In [129]:
for id in bat_dict.keys():
    feature_dict[id]["DeltaQ_logVar"] = get_var(id)
    feature_dict[id]["DeltaQ_logMin"] = get_min(id)
    feature_dict[id]["DeltaQ_logMean"] = get_min(id)
    feature_dict[id]["DeltaQ_logSkew"] = get_skew(id)
    feature_dict[id]["DeltaQ_logKurt"] = get_kurt(id)
    feature_dict[id]["DeltaQ_2V"] = (bat_dict[id]["cycles"]["99"]["Qdlin"]-bat_dict[id]["cycles"]["9"]["Qdlin"])[0]

## Discharge capacity curve features

In [103]:
def get_max(series):
    '''filter for outliers in max discharge capacity'''
    max_idx = np.argmax(series)
    while series[max_idx] > 1.3:
        series = np.delete(series,max_idx)
        max_idx = np.argmax(series)
    return series[max_idx]

In [130]:
for id in bat_dict.keys():
    # slope of linear fit to capacity fade curve, cycles 2 to 100
    linear_fit = np.polyfit(np.linspace(2,100,99),bat_dict[id]["summary"]["QD"][1:100],deg=1)
    feature_dict[id]["slope_capacity_fade_2-100"] = linear_fit[0]

    # intercept of linear fit to capacity fade curve, cycles 2 to 100
    feature_dict[id]["intercept_capacity_fade_2-100"] = linear_fit[1]

    # slope of linear fit to capacity fade curve, cycles 91 to 100
    linear_fit_2 = np.polyfit(np.linspace(91,100,10),bat_dict[id]["summary"]["QD"][90:100],deg=1)
    feature_dict[id]["slope_capacity_fade_91-100"] = linear_fit_2[0]

    # intercept of linear fit to capacity fade curve, cycles 91 to 100
    feature_dict[id]["intercept_capacity_91-100"] = linear_fit_2[1]

    # discharge capacity at cycle 2
    feature_dict[id]["QD_2"] = bat_dict[id]["summary"]["QD"][1]

    # difference between max discharge capacity and cycle 2, accounting for outliers
    #feature_dict[id]["QD_Max-2"] = (np.max(bat_dict[id]["summary"]["QD"])-bat_dict[id]["summary"]["QD"][1])
    feature_dict[id]["QD_Max-2"] = (get_max(bat_dict[id]["summary"]["QD"])-bat_dict[id]["summary"]["QD"][1])

    # discharge capacity at cycle 100
    feature_dict[id]["QD_100"] = bat_dict[id]["summary"]["QD"][99]


## Other Features

In [131]:
for id in bat_dict.keys():
    # average charge time, first five cycles
    feature_dict[id]["init_avg_charge_time"] = np.mean(bat_dict[id]["summary"]["chargetime"][1:6])

    # maximum temperature, cycles 2 to 100
    feature_dict[id]["T_max"] = np.max(bat_dict[id]["summary"]["Tmax"][1:100])

    # minimum temperature, cycles 2 to 100
    feature_dict[id]["T_min"] = np.min(bat_dict[id]["summary"]["Tmin"][1:100])

    # integral of temperature, cycles 2 through 100
    feature_dict[id]["T_integral"] = simpson(bat_dict[id]["summary"]["Tavg"][1:100],np.linspace(1,99,99))

    # internal resistance, cycle 2
    feature_dict[id]["IR_2"] = bat_dict[id]["summary"]["IR"][1]

    # minimum internal resistance, cycles 2 through 100
    feature_dict[id]["min_IR"] = np.min(bat_dict[id]["summary"]["IR"][1:100])

    # internal resistance, difference between cycle 100 and cycle 2
    feature_dict[id]["IR_100-2"] = bat_dict[id]["summary"]["IR"][99]-bat_dict[id]["summary"]["IR"][1]

In [132]:
feature_df = pd.DataFrame.from_dict(feature_dict,orient="index")

In [138]:
feature_df.to_csv("../Data/features4.csv")

In [137]:
feature_df

Unnamed: 0,cycle_life,DeltaQ_logVar,DeltaQ_logMin,DeltaQ_logMean,DeltaQ_logSkew,DeltaQ_logKurt,DeltaQ_2V,slope_capacity_fade_2-100,intercept_capacity_fade_2-100,slope_capacity_fade_91-100,...,QD_2,QD_Max-2,QD_100,init_avg_charge_time,T_max,T_min,T_integral,IR_2,min_IR,IR_100-2
b1c0,1852.0,-5.014861,-2.072648,-2.072648,-0.274041,0.129790,0.000055,-2.068070e-04,1.091144,0.000035,...,1.070689,0.006393,1.075913,13.374894,35.994705,29.012251,3097.354687,0.016742,0.016444,-0.000075
b1c1,2160.0,-5.013960,-1.958457,-1.958457,-0.367163,0.012464,-0.000157,5.535293e-06,1.080965,-0.000041,...,1.075301,0.009288,1.080630,13.409150,34.712265,29.230637,3070.296078,0.017039,0.000000,-0.000042
b1c2,2237.0,-4.737000,-1.764058,-1.764058,0.033502,-0.457627,0.000009,1.029563e-05,1.084846,-0.000044,...,1.079922,0.008131,1.084940,13.358242,35.127342,29.269444,3084.836680,0.016868,0.000000,-0.000014
b1c3,1434.0,-4.442613,-1.722149,-1.722149,-0.357486,0.039579,0.000182,1.713635e-05,1.084075,-0.000004,...,1.079723,0.006530,1.084750,12.025140,31.691414,29.023619,2934.438842,0.016370,0.000000,0.000039
b1c4,1709.0,-4.647744,-1.855177,-1.855177,-0.440634,0.125101,0.000042,1.899432e-05,1.081875,-0.000029,...,1.078417,0.005900,1.082646,12.041851,35.651741,29.092649,3082.307734,0.016787,0.000000,-0.000053
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
b3c39,1156.0,-4.454635,-1.758008,-1.758008,-0.533817,0.001307,-0.000094,-6.583238e-06,1.050919,-0.000024,...,1.047404,0.004107,1.049504,10.043833,36.491322,30.997316,3320.932109,0.015741,0.015481,-0.000155
b3c40,796.0,-4.295108,-1.656517,-1.656517,-0.501096,0.056338,0.000004,-7.692147e-06,1.063392,-0.000048,...,1.059324,0.004510,1.061757,10.043563,38.358112,30.658075,3337.319313,0.015562,0.015255,-0.000234
b3c41,786.0,-4.219509,-1.616310,-1.616310,-0.447169,0.026586,-0.000006,9.732245e-06,1.053878,-0.000051,...,1.050522,0.004362,1.054153,10.042974,37.245838,30.470637,3296.765921,0.018542,0.017007,-0.000173
b3c44,940.0,-4.131496,-1.585275,-1.585275,-0.611514,0.092860,-0.000100,4.689737e-06,1.073078,-0.000018,...,1.069720,0.004185,1.072928,10.043436,36.881859,30.183569,3268.494388,0.015723,0.015517,-0.000106


## To read data

In [161]:
loaded_df = pd.read_csv("../Data/features.csv")
loaded_df.head()

Unnamed: 0.1,Unnamed: 0,cycle_life,DeltaQ_logVar,DeltaQ_Min,DeltaQ_Skew,DeltaQ_Kurt,QD_Max-2,QD_2,slope_capacity_fade_2-100,intercept_capacity_fade_2-100,slope_capacity_fade_91-100,intercept_capacity_91-100,init_avg_charge_time,avg_T,min_IR,IR_100-2
0,b1c0,1852.0,-4.83617,-0.009054,0.443416,0.342752,0.468365,1.070689,-0.000207,1.091144,3.5e-05,1.072425,13.374894,31.603747,0.016444,-7.5e-05
1,b1c1,2160.0,-4.988991,-0.010529,0.072319,-0.156308,0.009288,1.075301,6e-06,1.080965,-4.1e-05,1.084767,13.40915,31.330314,0.0,-4.2e-05
2,b1c2,2237.0,-4.812421,-0.013186,0.059164,0.621248,0.008131,1.079922,1e-05,1.084846,-4.4e-05,1.089364,13.358242,31.479584,0.0,-1.4e-05
3,b1c3,1434.0,-4.371544,-0.018933,-0.35385,-1.072029,0.00653,1.079723,1.7e-05,1.084075,-4e-06,1.085131,12.02514,29.942199,0.0,3.9e-05
4,b1c4,1709.0,-4.5339,-0.017832,-0.257654,-0.549822,0.0059,1.078417,1.9e-05,1.081875,-2.9e-05,1.085538,12.041851,31.448884,0.0,-5.3e-05
