In [81]:
import pickle
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import DataLoader, TensorDataset
import math
import copy

## Load data

This uses the discharge-voltage curves at each cycle as raw data. Dimensionality is $(100,1000)$, with 100 cycles and 1000 voltage steps at which discharge is measured

In [71]:
data = pickle.load(open(r'..\Data\discharge_curves.pkl', 'rb'))

In [78]:
x = np.stack(data["discharge_curves"].values) # reshape data
y = data["cycle_life"].values

In [4]:
np.shape(x) # 124 batteries, 99 cycles, discharge measured at 1000 voltage steps per cycle

(124, 99, 1000)

In [65]:
np.shape(y)

(124,)

In [92]:
# convert to tensors and concatenate features and labels
x = torch.Tensor(x)
y = torch.Tensor(y)

## Split into train/test sets

In [91]:
test_ind = np.array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
       68, 70, 72, 74, 76, 78, 80, 82, 83])

train_ind = np.array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
       35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
       69, 71, 73, 75, 77, 79, 81])

secondary_ind = np.array([ 84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
        97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
       110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
       123])

In [106]:
train = TensorDataset(torch.Tensor(x[train_ind]),torch.Tensor(y[train_ind]))
test = TensorDataset(torch.Tensor(x[test_ind]),torch.Tensor(y[test_ind]))
stest = TensorDataset(torch.Tensor(x[secondary_ind]),torch.Tensor(y[secondary_ind]))

## Transformer model

In [118]:
# parameters
d_model = 1000
nhead = 8
batch_size = 3
nepoch = 1
batch_first = True

In [119]:
transformer = nn.Transformer(d_model=d_model,nhead=nhead,batch_first=batch_first)

In [145]:
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

transformer.train()

for epoch in range(nepoch):
    train_batched = DataLoader(train,batch_size=batch_size,shuffle=True)
    optimizer.zero_grad()

    for i,batch in enumerate(train_batched):
        input,labels = batch
        # need to do something to labels to match dimensionality of input
        output = transformer(input, labels)
        # this line is wrong need to adapt to our data
        loss = criterion(output.contiguous().view(-1, tgt_vocab_size), tgt_data[:, 1:].contiguous().view(-1))
        loss.backward()
        optimizer.step()
    print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

RuntimeError: the feature number of src and tgt must be equal to d_model

In [146]:
input.size()

torch.Size([3, 99, 1000])

In [147]:
labels.size()

torch.Size([3])