In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
from pathlib import Path

In [None]:
path1 = Path("Data/batch1.pkl")
batch1 = pickle.load(open(path1, 'rb'))

#remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

In [None]:
numBat1 = len(batch1.keys())
numBat1

In [None]:
batch1.keys()

# 1 Cell

In [None]:
# this is all data for cell 0 from batch 1
# The cleaned data set from all 3 batches contains 124 cells
batch1["b1c0"].keys() 

## Dict structure
1. Cell<br>
    1.1 Cycle Life<br>
    1.2 Charge Policy<br>
    1.3 Summary (per charging cycle)
        1.3.1 Internal Resistance (Ω)
        1.3.2 Total Quantity of Charge (Ah)
        1.3.3 Total Quantity of Discharge (Ah)
        1.3.4 Temp Avg (C°)
        1.3.5 Temp min (C°)
        1.3.6 Temp Max (C°)
        1.3.7 Charge time (minutes)
        1.3.8 Cycle no. (Int)
    1.4 Cycle (all data points per cycle)
        1.4.1 Current (C-Rate)
        1.4.2 Quantity of Charge (Ah)
        1.4.3 Quantity of Discharge (Ah)
        1.4.4 Qdlin (?)
        1.4.5 Temperature (C°)
        1.4.6 Tdlin (?)
        1.4.7 Voltage (V)
        1.4.8 dQdV (?)
        1.4.9 Time elapsed (minutes)    

# 1.1 Cycle Life

In [None]:
batch1["b1c0"]["cycle_life"] # total number of cycles measured

# 1.2 Charge Policy

In [None]:
"""
A C-rate is a measure of the rate at which a battery is discharged relative to its maximum capacity.
A 1C rate means that the discharge current will discharge the entire battery in 1 hour.
For a battery with a capacity of 100 Amp-hrs, this equates to a discharge current of 100 Amps.
A 5C rate for this battery would be 500 Amps, and a C/2 rate would be 50 Amps.
We have a C3.6 rate that's 360 Ampere.
After the battery has been charged to 80% the second part of the charging policy takes effect"""
batch1["b1c0"]["charge_policy"]

# 1.3 Summary (for each charging cycle)

In [None]:
batch1["b1c0"]["summary"].keys()

### 1.3.1 Internal Resistance

In [None]:
# Measured in Ohm
cleaned_I = [i for i in batch1["b1c0"]["summary"]["IR"] if i > 0.01]
plt.grid()
plt.plot(cleaned_I)
plt.show()

### 1.3.2/3 Quantity of charge/discharge

In [None]:
# Q is used to denote a quantity of electricity or charge. Measured in Ah.
cleaned_QC = [i for i in batch1["b1c0"]["summary"]["QC"] if 1.5 > i > 0.5]
cleaned_QD = [i for i in batch1["b1c0"]["summary"]["QD"] if 1.5 > i > 0.5]
plt.plot(cleaned_QC, label="Charge")
plt.plot(cleaned_QD, label="Discharge")
plt.legend()
plt.grid()
plt.show()

### 1.3.4/5/6 Temperature mean/min/max

In [None]:
# measured in Celcius?
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True, sharey=True, figsize=(12,8))
ax1.plot(batch1["b1c0"]["summary"]["Tavg"][1:], label="AVG")
ax2.plot(batch1["b1c0"]["summary"]["Tmin"][1:], label="MIN")
ax3.plot(batch1["b1c0"]["summary"]["Tmax"][1:], label="MAX")
ax1.grid()
ax1.legend()
ax2.grid()
ax2.legend()
ax3.grid()
ax3.legend()
plt.show()

### 1.3.7 Charge time

In [None]:
# measured in minutes?
plt.figure(figsize=(15,5))
cleaned_chargetime = [i for i in batch1["b1c0"]["summary"]["chargetime"] if i > 5]
plt.plot(cleaned_chargetime)
plt.grid()
plt.show()

### 1.3.8 Cycle number

In [None]:
list(batch1["b1c0"]["summary"]["cycle"])[0:10] # Number/index of charging cycle

# 1.4 Cycles

In [None]:
list(batch1["b1c0"]["cycles"].keys())[-10:-1] # For this cell they measured 1188 charging cycles

### Get all data from one exemplary charging cycle, e.g. #1054

In [None]:
for key, val in batch1["b1c0"]["cycles"]["1054"].items():
    print("%s\t%s" % (key, len(val)))

### 1.4.1 I (current in C)

In [None]:
"""
The cell is fast charged with 4C until ~80%, then slowly full charged with 1C. 
Discharging happens with constant 4C.
For explanation of charging policy see 1.2.
"""
plt.plot(batch1["b1c0"]["cycles"]["1054"]["t"], batch1["b1c0"]["cycles"]["1054"]["I"])
plt.grid()
plt.show()

### 1.4.2/3 Qc (charge) and Qd (discharge) in Ah

In [None]:
plt.plot(batch1["b1c0"]["cycles"]["1054"]["t"], batch1["b1c0"]["cycles"]["1054"]["Qc"], label="Qc")
plt.plot(batch1["b1c0"]["cycles"]["1054"]["t"], batch1["b1c0"]["cycles"]["1054"]["Qd"], label="Qd")
plt.grid()
plt.legend()
plt.show()

In [None]:
# Last Cycle Measurements are recorded in Summary for Cell Cycle
print('Qd last cycle', batch1["b1c0"]["cycles"]["1054"]["Qd"][-1])
print('QD summary', batch1["b1c0"]["summary"]["QD"][1054])
print('Qc last cycle', batch1["b1c0"]["cycles"]["1054"]["Qc"][-1])
print('QC summary', batch1["b1c0"]["summary"]["QC"][1054])

In [None]:
plt.plot(batch1["b1c0"]["cycles"]["1054"]["t"], batch1["b1c0"]["cycles"]["1054"]["Qc"] - batch1["b1c0"]["cycles"]["1054"]["Qd"])
plt.grid()
plt.show()

### 1.4.4 Qdlin

In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

def plot_T(cell=1, cycle=2):
    f, ax = plt.subplots(nrows=2, figsize=(4, 8))
    ax[0].plot(
        batch1[f"b1c{cell}"]["cycles"][f"{cycle}"]["t"],
        batch1[f"b1c{cell}"]["cycles"][f"{cycle}"]["T"]
    );
    ax[0].grid()
    ax[0].set_ylim([29,36])
    print("T min: ", np.min(batch1[f"b1c{cell}"]["cycles"][f"{cycle}"]["T"]))
    print("T max: ", np.max(batch1[f"b1c{cell}"]["cycles"][f"{cycle}"]["T"]))

    
    ax[1].plot(
        batch1[f"b1c{cell}"]["cycles"][f"{cycle}"]["Tdlin"]
    );
    ax[1].grid()
    ax[1].set_ylim([29,36])
    print("Tdlin min: ", np.min(batch1[f"b1c{cell}"]["cycles"][f"{cycle}"]["Tdlin"]))
    print("Tdlin max: ", np.max(batch1[f"b1c{cell}"]["cycles"][f"{cycle}"]["Tdlin"]))
    
interact(
    plot_T,
    cell=widgets.IntSlider(value=0, description='cell', max=40, min=0),
    cycle=widgets.IntSlider(value=1, description='cycle', max=100, min=1)
);

In [None]:
# Is Qdlin a smoothed out version of Qc?
plt.plot(batch1["b1c0"]["cycles"]["1054"]["Qc"], label="Qc")
plt.plot(batch1["b1c0"]["cycles"]["1054"]["Qdlin"], label="Qdlin")
plt.grid()
plt.legend()
plt.show()

### 1.4.5 T (temperature in Celsius)

In [None]:
plt.plot(batch1["b1c0"]["cycles"]["1054"]["t"], batch1["b1c0"]["cycles"]["1054"]["T"])
plt.grid()
plt.show()

### 1.4.6 Tdlin

In [None]:
plt.plot(batch1["b1c0"]["cycles"]["1054"]["Tdlin"])
plt.grid()
plt.show()

### 1.4.7 V (Volt)

In [None]:
plt.plot(batch1["b1c0"]["cycles"]["1054"]["t"], batch1["b1c0"]["cycles"]["1054"]["V"])
plt.grid()
plt.show()

### 1.4.8 dQdV

In [None]:
plt.plot(batch1["b1c0"]["cycles"]["1054"]["dQdV"])
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(14,8))

keys = list(batch1["b1c0"]["cycles"].keys())
for key in keys[1::100]:
    plt.plot(batch1["b1c0"]["cycles"][key]["dQdV"])

plt.grid()    
plt.show()

In [None]:
# plot dQdV's for one test cell
dqdv_sample = batch1["b1c0"]

In [None]:
plt.figure(figsize=(20,10))
for i, cycle in dqdv_sample["cycles"].items():
    if int(i) % 200 == 0:
        plt.plot(cycle["dQdV"][:900], label=i)
plt.title('dQdV, Select Cycles')
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(20,10))
for i, cycle in dqdv_sample["cycles"].items():
    if int(i) % 200 == 0:
        plt.plot(cycle["dQdV"][200:300], label=i)
plt.title('dQdV (1st 100 steps), Select Cycles')
plt.legend()
plt.show()

In [None]:
# dQdV, Qdlin:  what is the relationship between these two?
plt.plot(batch1['b1c43']['cycles']['10']['Qdlin'], label='Qdlin')
plt.plot(batch1['b1c43']['cycles']['10']['dQdV'], label='dQdV')
plt.legend()
plt.show()

### 1.4.9 t (time)

In [None]:
# time elapsed at each step
plt.plot(batch1["b1c0"]["cycles"]["1054"]["t"])
plt.grid()
plt.show()

In [None]:
# time consists of float numbers that represent minutes
list(batch1["b1c0"]["cycles"]["1054"]["t"])[-10:-1]

# Additional graphs

In [None]:
plt.figure(figsize=(14,8))

keys = list(batch1["b1c0"]["cycles"].keys())
for key in keys[1::100]:
    plt.plot(batch1["b1c0"]["cycles"][key]["t"], batch1["b1c0"]["cycles"][key]["V"])
plt.title("Voltage (V) over time (min) for every 100th cycle")
plt.grid()    
plt.show()

In [None]:
plt.figure(figsize=(14,8))

keys = list(batch1["b1c0"]["cycles"].keys())
for key in keys[1::100]:
    plt.plot(batch1["b1c0"]["cycles"][key]["Qd"], batch1["b1c0"]["cycles"][key]["V"])
plt.title("Voltage (V) over charge (Ah) for every 100th cycle")
plt.grid()    
plt.show()

In [None]:
import pandas as pd
import seaborn as sns

In [None]:
cycle_df = {k: batch1["b1c0"]["cycles"]["1054"][k] for k in ('I', 'Qc', 'Qd', 'T', 'V', 't')}
cycle_df = pd.DataFrame.from_dict(cycle_df)
sns.heatmap(cycle_df.corr(), annot=True)
plt.show()

In [None]:
lin_df = {k: batch1["b1c0"]["cycles"]["1054"][k] for k in ('Qdlin', 'Tdlin', 'dQdV')}
lin_df = pd.DataFrame.from_dict(lin_df)
sns.heatmap(lin_df.corr(), annot=True)
plt.show()

Tdlin and Qdlin are on different scales but have almost the same form. Why are they so highly correlated? 

# Continue original notebook

In [None]:
path2 = Path("Data/batch2.pkl")
batch2 = pickle.load(open(path3, 'rb'))

In [None]:
# There are four cells from batch1 that carried into batch2, we'll remove the data from batch2
# and put it with the correct cell from batch1
batch2_keys = ['b2c7', 'b2c8', 'b2c9', 'b2c15', 'b2c16']
batch1_keys = ['b1c0', 'b1c1', 'b1c2', 'b1c3', 'b1c4']
add_len = [662, 981, 1060, 208, 482];

In [None]:
for i, bk in enumerate(batch1_keys):
    batch1[bk]['cycle_life'] = batch1[bk]['cycle_life'] + add_len[i]
    for j in batch1[bk]['summary'].keys():
        if j == 'cycle':
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j] + len(batch1[bk]['summary'][j])))
        else:
            batch1[bk]['summary'][j] = np.hstack((batch1[bk]['summary'][j], batch2[batch2_keys[i]]['summary'][j]))
    last_cycle = len(batch1[bk]['cycles'].keys())
    for j, jk in enumerate(batch2[batch2_keys[i]]['cycles'].keys()):
        batch1[bk]['cycles'][str(last_cycle + j)] = batch2[batch2_keys[i]]['cycles'][jk]

In [None]:
del batch2['b2c7']
del batch2['b2c8']
del batch2['b2c9']
del batch2['b2c15']
del batch2['b2c16']

In [None]:
numBat2 = len(batch2.keys())
numBat2

In [None]:
path3 = Path("Data/batch3.pkl")
batch3 = pickle.load(open(path3, 'rb'))

# remove noisy channels from batch3
del batch3['b3c37']
del batch3['b3c2']
del batch3['b3c23']
del batch3['b3c32']
del batch3['b3c38']
del batch3['b3c39']

In [None]:
numBat3 = len(batch3.keys())
numBat3

In [None]:
numBat = numBat1 + numBat2 + numBat3
numBat

In [None]:
bat_dict = {**batch1, **batch2, **batch3}

In [None]:
for i in bat_dict.keys():
    plt.plot(bat_dict[i]['summary']['cycle'], bat_dict[i]['summary']['QD'])
plt.xlabel('Cycle Number')
plt.ylabel('Discharge Capacity (Ah)')

### Train and Test Split
If you are interested in using the same train/test split as the paper, use the indices specified below

In [None]:
test_ind = np.hstack((np.arange(0,(numBat1+numBat2),2),83))
train_ind = np.arange(1,(numBat1+numBat2-1),2)
secondary_test_ind = np.arange(numBat-numBat3,numBat);