In [1]:
import pickle
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import DataLoader, TensorDataset
import math
import copy

## Load data

This uses the discharge-voltage curves at each cycle as raw data. Dimensionality is $(100,1000)$, with 100 cycles and 1000 voltage steps at which discharge is measured

In [2]:
data = pickle.load(open(r'../Data/discharge_curves.pkl', 'rb'))

In [3]:
x = np.stack(data["discharge_curves"].values) # reshape data
y = data["cycle_life"].values

In [4]:
np.shape(x) # 124 batteries, 99 cycles, discharge measured at 1000 voltage steps per cycle

(124, 99, 1000)

In [5]:
np.shape(y)

(124,)

In [6]:
# convert to tensors and concatenate features and labels
x = torch.Tensor(x)
y = torch.Tensor(y)

In [7]:
position_embeddings = nn.Parameter(torch.randn(len(x), 99, 1000))

# Add position embedding into patch embedding
input_embeddings = x + position_embeddings

## Split into train/test sets

In [8]:
test_ind = np.array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66,
       68, 70, 72, 74, 76, 78, 80, 82, 83])

train_ind = np.array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33,
       35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
       69, 71, 73, 75, 77, 79, 81])

secondary_ind = np.array([ 84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
        97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
       110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
       123])

In [9]:
train = TensorDataset(torch.Tensor(x[train_ind]),torch.Tensor(y[train_ind]))
test = TensorDataset(torch.Tensor(x[test_ind]),torch.Tensor(y[test_ind]))
stest = TensorDataset(torch.Tensor(x[secondary_ind]),torch.Tensor(y[secondary_ind]))

## Transformer model

In [10]:
input_embeddings.size()

torch.Size([124, 99, 1000])

In [11]:
# parameters
d_model = 1000
nhead = 8
batch_size = 3
nepoch = 1
batch_first = True

In [12]:
num_heads = 8
num_layers = 12

# Define Transformer encoders' stack
transformer_encoder_layer = nn.TransformerEncoderLayer(
           d_model=d_model, nhead=num_heads,
           dim_feedforward=int(d_model * 4),
           dropout=0.1)
transformer_encoder = nn.TransformerEncoder(
           encoder_layer=transformer_encoder_layer,
           num_layers=num_layers)

# Forward pass
output_embeddings = transformer_encoder(input_embeddings[:10])
print(f' Output embedding size: {output_embeddings.size()}')

 Output embedding size: torch.Size([10, 99, 1000])


In [13]:
regressor = nn.Linear(d_model, 1) 
output_regression = regressor(torch.sum(output_embeddings, dim = 1))
output_regression

tensor([[nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan],
        [nan]], grad_fn=<AddmmBackward0>)

In [20]:
class FullModel(torch.nn.Module):

    def __init__(self):
        super(FullModel, self).__init__()

        self.position_embeddings = nn.Parameter(torch.randn(99, 1000))
        self.transformer = transformer_encoder
        self.linear = regressor
        self.initialize_parameters()

    def initialize_parameters(self):
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, x):
        # Add position embedding into patch embedding
        x = x + position_embeddings
        x = self.transformer(x)
        x = self.linear(x)
        return x

In [21]:
model_toy = FullModel()
print('The model:')
print(model_toy)


print('\n\nModel params:')
for param in model_toy.parameters():
    print(param)

The model:
FullModel(
  (transformer): TransformerEncoder(
    (layers): ModuleList(
      (0-11): 12 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=1000, out_features=1000, bias=True)
        )
        (linear1): Linear(in_features=1000, out_features=4000, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=4000, out_features=1000, bias=True)
        (norm1): LayerNorm((1000,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((1000,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (linear): Linear(in_features=1000, out_features=1, bias=True)
)


Model params:
Parameter containing:
tensor([[-0.0035, -0.0022,  0.0081,  ..., -0.0612,  0.0127, -0.0506],
        [-0.0272,  0.0254,  0.0688,  ..., -0.0577, -0.0154, -0.0704],
        [ 0.067

In [18]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Load nodel, loss function, and optimizer
model = FullModel().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Load batch image
train_dataset = TensorDataset(torch.Tensor(x[train_ind]),torch.Tensor(y[train_ind]))
# train_dataloader = DataLoader(train_dataset, num_workers=1, batch_size=batch_size, shuffle=True)

# # Fine tuning loop
for i in range(1):
    total_acc_train = 0
    total_loss_train = 0.0

    train_inputs, train_outputs = train_dataset[:1]
    for train_input, train_label in zip(train_inputs, train_outputs):
        print(train_label)
        output = model(train_input.to(device))
        print(output)
        # loss = criterion(output, train_label.to(device))
        # acc = (output.argmax(dim=1) == train_label.to(device)).sum().item()
        # total_acc_train += acc
        # total_loss_train += loss.item()

        # loss.backward()
        # optimizer.step()
        # optimizer.zero_grad()

    # print(f'Epochs: {i + 1} | Loss: {total_loss_train / len(train_dataset): .3f} | Accuracy: {total_acc_train / len(train_dataset): .3f}')


# Hyperparameters
EPOCHS = 10
LEARNING_RATE = 1e-4
BATCH_SIZE = 8


tensor(2160.)


: 

: 

In [41]:
train_dataset[:2]

(tensor([[[1.0637e+00, 1.0636e+00, 1.0635e+00,  ..., 1.1832e-03,
           1.1533e-03, 1.1205e-03],
          [1.0640e+00, 1.0640e+00, 1.0639e+00,  ..., 2.4317e-02,
           2.5293e-02, 2.6291e-02],
          [1.0659e+00, 1.0658e+00, 1.0657e+00,  ..., 2.9703e-03,
           3.1020e-03, 3.2379e-03],
          ...,
          [1.0649e+00, 1.0648e+00, 1.0648e+00,  ..., 1.2261e-02,
           1.2833e-02, 1.3419e-02],
          [1.0688e+00, 1.0687e+00, 1.0685e+00,  ..., 1.7354e-04,
           1.5883e-04, 1.4444e-04],
          [1.0659e+00, 1.0658e+00, 1.0658e+00,  ..., 1.6844e-02,
           1.7608e-02, 1.8391e-02]],
 
         [[1.0659e+00, 1.0659e+00, 1.0658e+00,  ..., 1.5821e-02,
           1.6465e-02, 1.7124e-02],
          [1.0649e+00, 1.0649e+00, 1.0649e+00,  ..., 5.2735e-03,
           5.5002e-03, 5.7334e-03],
          [1.0685e+00, 1.0684e+00, 1.0683e+00,  ..., 6.9275e-03,
           7.2806e-03, 7.6443e-03],
          ...,
          [1.0667e+00, 1.0666e+00, 1.0666e+00,  ..., 2.253