In [6]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline
import pickle
import pandas as pd

## Load and combine raw data from all three batches

In [7]:
batch1 = pickle.load(open(r'..\Data\batch1.pkl', 'rb'))
#remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

In [8]:
numBat1 = len(batch1.keys())
numBat1

41

In [9]:
batch2 = pickle.load(open(r'..\Data\batch2.pkl','rb'))

In [10]:
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482];

In [11]:
for i, bk in enumerate(batch1_keys):
    batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
    for j in batch1[bk]['summary'].keys():
        if j == 'cycle':
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
        else:
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
    last_cycle = len(batch1[bk]['cycles'].keys())
    for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
        batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]

In [12]:
del batch2['b2c7']
del batch2['b2c8']
del batch2['b2c9']
del batch2['b2c15']
del batch2['b2c16']

In [13]:
numBat2 = len(batch2.keys())
numBat2

43

In [14]:
batch3 = pickle.load(open(r'..\Data\batch3.pkl','rb'))
# remove noisy channels from batch3
del batch3['b3c37']
del batch3['b3c2']
del batch3['b3c23']
del batch3['b3c32']
del batch3['b3c42']
del batch3['b3c43']

In [15]:
numBat3 = len(batch3.keys())
numBat3

40

In [16]:
numBat = numBat1 + numBat2 + numBat3
numBat

124

In [17]:
bat_dict = {**batch1, **batch2, **batch3}

## Build dictionary 

In [143]:
feature_dict = {}
for id in bat_dict.keys():
    feature_dict[id] = {"cycle_life":bat_dict[id]["cycle_life"][0][0]}

## Calculate $\Delta Q_{100 - 10}$ features

In [144]:
def crop_data(id,cycle):
    # isolate discharging data
    cropped = np.array([[Q, V] for Q, V in zip(bat_dict[id]["cycles"][cycle]["Qd"], bat_dict[id]["cycles"][cycle]["V"]) if Q > 1e-5])
    for i, datapoint in enumerate(cropped):
        if datapoint[1] < cropped[i+1, 1]:
            if datapoint[1] < 2.2: # identify where discharging ends
                end = i+1
                break
            else: # this is an anomalous increase in voltage; remove so voltage is decreasing
                cropped[i+1, 1] = cropped[i, 1]
    cropped = cropped[:end]    
    x = np.flip(cropped[:, 1])
    y = np.flip(cropped[:, 0])
    return x,y

In [139]:
from scipy.stats import skew, kurtosis

xs = np.linspace(2,3.5,1000)

def interpolate_spline(id):
    x_100,y_100 = crop_data(id,"100")
    x_10,y_10 = crop_data(id,"10")

    cs_100 = UnivariateSpline(x_100,y_100,s=0.001)
    cs_10 = UnivariateSpline(x_10,y_10,s=0.001)
    return cs_10, cs_100

def get_var(id):
    cs_10, cs_100 = interpolate_spline(id)
    return np.log10(np.var(cs_100(xs) - cs_10(xs)))

def get_min(id):
    cs_10, cs_100 = interpolate_spline(id)
    return np.min(cs_100(xs) - cs_10(xs))

def get_skew(id):
    cs_10, cs_100 = interpolate_spline(id)
    return skew(cs_100(xs) - cs_10(xs))

def get_kurt(id):
    cs_10, cs_100 = interpolate_spline(id)
    return kurtosis(cs_100(xs) - cs_10(xs))

In [145]:
for id in bat_dict.keys():
    feature_dict[id]["DeltaQ_logVar"] = get_var(id)
    feature_dict[id]["DeltaQ_Min"] = get_min(id)
    feature_dict[id]["DeltaQ_Skew"] = get_skew(id)
    feature_dict[id]["DeltaQ_Kurt"] = get_kurt(id)

## Discharge capacity curve features

In [150]:
for id in bat_dict.keys():
    # difference between max discharge capacity and cycle 2
    feature_dict[id]["QD_Max-2"] = (np.max(bat_dict[id]["summary"]["QD"])-bat_dict[id]["summary"]["QD"][1])

    # discharge capacity at cycle 2
    feature_dict[id]["QD_2"] = bat_dict[id]["summary"]["QD"][1]

    # slope of linear fit to capacity fade curve, cycles 2 to 100
    linear_fit = np.polyfit(np.linspace(2,100,99),bat_dict[id]["summary"]["QD"][1:100],deg=1)
    feature_dict[id]["slope_capacity_fade_2-100"] = linear_fit[0]

    # intercept of linear fit to capacity fade curve, cycles 2 to 100
    feature_dict[id]["intercept_capacity_fade_2-100"] = linear_fit[1]

    # slope of linear fit to capacity fade curve, cycles 91 to 100
    linear_fit_2 = np.polyfit(np.linspace(91,100,10),bat_dict[id]["summary"]["QD"][90:100],deg=1)
    feature_dict[id]["slope_capacity_fade_91-100"] = linear_fit_2[0]

    # intercept of linear fit to capacity fade curve, cycles 91 to 100
    feature_dict[id]["intercept_capacity_91-100"] = linear_fit_2[1]


## Other Features

In [157]:
for id in bat_dict.keys():
    # average charge time, first five cycles
    feature_dict[id]["init_avg_charge_time"] = np.mean(bat_dict[id]["summary"]["chargetime"][1:6])

    # average temperature over time, cycles 2 through 100
    feature_dict[id]["avg_T"] = np.mean(bat_dict[id]["summary"]["Tavg"][1:100])

    # minimum internal resistance, cycles 2 through 100
    feature_dict[id]["min_IR"] = np.min(bat_dict[id]["summary"]["IR"][1:100])

    # internal resistance, difference between cycle 100 and cycle 2
    feature_dict[id]["IR_100-2"] = bat_dict[id]["summary"]["IR"][99]-bat_dict[id]["summary"]["IR"][1]

In [158]:
feature_df = pd.DataFrame.from_dict(feature_dict,orient="index")

In [160]:
feature_df.to_csv("../Data/features.csv")

## To read data

In [161]:
loaded_df = pd.read_csv("../Data/features.csv")
loaded_df.head()

Unnamed: 0.1,Unnamed: 0,cycle_life,DeltaQ_logVar,DeltaQ_Min,DeltaQ_Skew,DeltaQ_Kurt,QD_Max-2,QD_2,slope_capacity_fade_2-100,intercept_capacity_fade_2-100,slope_capacity_fade_91-100,intercept_capacity_91-100,init_avg_charge_time,avg_T,min_IR,IR_100-2
0,b1c0,1852.0,-4.83617,-0.009054,0.443416,0.342752,0.468365,1.070689,-0.000207,1.091144,3.5e-05,1.072425,13.374894,31.603747,0.016444,-7.5e-05
1,b1c1,2160.0,-4.988991,-0.010529,0.072319,-0.156308,0.009288,1.075301,6e-06,1.080965,-4.1e-05,1.084767,13.40915,31.330314,0.0,-4.2e-05
2,b1c2,2237.0,-4.812421,-0.013186,0.059164,0.621248,0.008131,1.079922,1e-05,1.084846,-4.4e-05,1.089364,13.358242,31.479584,0.0,-1.4e-05
3,b1c3,1434.0,-4.371544,-0.018933,-0.35385,-1.072029,0.00653,1.079723,1.7e-05,1.084075,-4e-06,1.085131,12.02514,29.942199,0.0,3.9e-05
4,b1c4,1709.0,-4.5339,-0.017832,-0.257654,-0.549822,0.0059,1.078417,1.9e-05,1.081875,-2.9e-05,1.085538,12.041851,31.448884,0.0,-5.3e-05
