In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline
import pickle
import pandas as pd

## Load and combine raw data from all three batches

In [3]:
batch1 = pickle.load(open(r'..\Data\batch1.pkl', 'rb'))
#remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

In [4]:
numBat1 = len(batch1.keys())
numBat1

41

In [5]:
batch2 = pickle.load(open(r'..\Data\batch2.pkl','rb'))

In [6]:
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482];

In [7]:
for i, bk in enumerate(batch1_keys):
    batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
    for j in batch1[bk]['summary'].keys():
        if j == 'cycle':
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
        else:
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
    last_cycle = len(batch1[bk]['cycles'].keys())
    for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
        batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]

In [8]:
del batch2['b2c7']
del batch2['b2c8']
del batch2['b2c9']
del batch2['b2c15']
del batch2['b2c16']

In [9]:
numBat2 = len(batch2.keys())
numBat2

43

In [10]:
batch3 = pickle.load(open(r'..\Data\batch3.pkl','rb'))
# remove noisy channels from batch3
del batch3['b3c37']
del batch3['b3c2']
del batch3['b3c23']
del batch3['b3c32']
del batch3['b3c42']
del batch3['b3c43']

In [11]:
numBat3 = len(batch3.keys())
numBat3

40

In [12]:
numBat = numBat1 + numBat2 + numBat3
numBat

124

In [13]:
bat_dict = {**batch1, **batch2, **batch3}

## Build dictionary 

In [14]:
feature_dict = {}
for id in bat_dict.keys():
    feature_dict[id] = {"cycle_life":bat_dict[id]["cycle_life"][0][0]}

## Extract discharge curve

In [44]:
def crop_data(id,cycle):
    # isolate discharging data
    cropped = np.array([[Q, V] for Q, V in zip(bat_dict[id]["cycles"][cycle]["Qd"], bat_dict[id]["cycles"][cycle]["V"]) if Q > 1e-4])
    for i, datapoint in enumerate(cropped):
        if datapoint[1] < cropped[i+1, 1]:
            if datapoint[1] < 2.2: # identify where discharging ends
                end = i+1
                break
            else: # this is an anomalous increase in voltage; remove so voltage is decreasing
                cropped[i+1, 1] = cropped[i, 1]
    cropped = cropped[:end]    
    x = np.flip(cropped[:, 1])
    y = np.flip(cropped[:, 0])
    return x,y

In [28]:
def interpolate_spline_cycle(id,cycle):
    x,y = crop_data(id,cycle)
    xs = np.linspace(2,3.5,1000)
    cs = UnivariateSpline(x,y,s=0.001)
    return cs(xs)

In [73]:
for id in bat_dict.keys():
    curves = []
    for j in range(1,100):
        # these are the Qd (discharge) values for a Qd vs. V curve
        # 1000 evenly spaced values of V from 2.5 to 3
        cycle_curve = interpolate_spline_cycle(id,str(j))
        curves.append(cycle_curve)
    feature_dict[id]["discharge_curves"] = np.array(curves)

The maximal number of iterations maxit (set to 20 by the program)
allowed for finding a smoothing spline with fp=s has been reached: s
too small.
There is an approximation returned but the corresponding weighted sum
of squared residuals does not satisfy the condition abs(fp-s)/s < tol.


In [74]:
feature_df = pd.DataFrame.from_dict(feature_dict,orient="index")

In [75]:
feature_df.head()

Unnamed: 0,cycle_life,discharge_curves
b1c0,1852.0,"[[1.0562526138096293, 1.0561820217813436, 1.05..."
b1c1,2160.0,"[[1.0636727527198908, 1.0635899740279227, 1.06..."
b1c2,2237.0,"[[1.065187101384174, 1.0651463392314973, 1.065..."
b1c3,1434.0,"[[1.065937842155559, 1.065865205816492, 1.0657..."
b1c4,1709.0,"[[1.0646454092546713, 1.064590333943115, 1.064..."


In [85]:
# check that this data can be loaded as a 3D array
np.shape(np.stack(feature_df["discharge_curves"].values))

(124, 99, 1000)

In [89]:
with open('../Data/discharge_curves.pkl','wb') as fp:
    pickle.dump(feature_df,fp)