In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
import plotly.express as px
import random
import os
has_mps = torch.backends.mps.is_built()
# device = "cpu"
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [2]:
def set_seed(seed):
    random.seed(seed)                       # Python RNG
    np.random.seed(seed)                    # NumPy RNG
    torch.manual_seed(seed)                 # PyTorch CPU RNG
    torch.cuda.manual_seed(seed)            # PyTorch current GPU RNG
    torch.cuda.manual_seed_all(seed)        # PyTorch all GPU RNGs

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False  # Better reproducibility

    os.environ["PYTHONHASHSEED"] = str(seed)  # For hash-based functions

# Example usage
set_seed(18)

In [3]:
fn = "./datasets/CALCE/CALCE.csv"
df = pd.read_csv(fn)
df.columns

Index(['cycle', 'capacity_CS2_35', 'capacity_CS2_36', 'capacity_CS2_37',
       'capacity_CS2_38'],
      dtype='object')

In [4]:
spots_train_1 = df["capacity_CS2_36"].to_numpy().reshape(-1, 1)
spots_train_2 = df["capacity_CS2_37"].to_numpy().reshape(-1, 1)
spots_train_3 = df["capacity_CS2_38"].to_numpy().reshape(-1, 1)

spots_test = df["capacity_CS2_35"].to_numpy().reshape(-1, 1)

In [5]:
scaler = StandardScaler()
spots_train_1 = scaler.fit_transform(spots_train_1).flatten().tolist()
spots_train_2 = scaler.fit_transform(spots_train_2).flatten().tolist()
spots_train_3 = scaler.fit_transform(spots_train_3).flatten().tolist()

spots_test = scaler.transform(spots_test).flatten().tolist()

# Sequence Data Preparation
SEQUENCE_SIZE = 30

In [6]:


def to_sequences(seq_size, obs):
    x = []
    y = []
    for i in range(len(obs) - seq_size):
        window = obs[i:(i + seq_size)]
        after_window = obs[i + seq_size]
        x.append(window)
        y.append(after_window)
    return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, 1), torch.tensor(y, dtype=torch.float32).view(-1, 1)

x_train_1, y_train_1 = to_sequences(SEQUENCE_SIZE, spots_train_1)
x_train_2, y_train_2 = to_sequences(SEQUENCE_SIZE, spots_train_2)
x_train_3, y_train_3 = to_sequences(SEQUENCE_SIZE, spots_train_3)
x_test, y_test = to_sequences(SEQUENCE_SIZE, spots_test)

# Setup data loaders for batch
'''TensorDataset
- TensorDataset is a special PyTorch wrapper that combines muliple tensors into a single dataset
- x_train and y_train must have the same first dimension
- this allows easy retrieval of (input, target) pairs during training
- makes integration with DataLoader seamless
'''

'''DataLoader
- Batches data for efficienct training
- Shuffles data befoe each epoch to prevent model from memorizing the order
It randomly select batch(32) size from train_dataset and group them into a batch [32, (sequence)10, 1] for x_train and [32, 1] for y_train
return as PyTorch Tensor for training
'''



'DataLoader\n- Batches data for efficienct training\n- Shuffles data befoe each epoch to prevent model from memorizing the order\nIt randomly select batch(32) size from train_dataset and group them into a batch [32, (sequence)10, 1] for x_train and [32, 1] for y_train\nreturn as PyTorch Tensor for training\n'

In [7]:
x_train_1.shape


torch.Size([852, 30, 1])

In [8]:
x_train_2.shape

torch.Size([852, 30, 1])

In [9]:
x_train = torch.cat((x_train_1, x_train_2, x_train_3), dim=0)
y_train = torch.cat((y_train_1, y_train_2, y_train_3), dim=0)

In [10]:
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [11]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        self.pe = nn.Embedding(max_len, d_model)

    def forward(self, x):
        print(x.shape)
        pos_emb = self.pe(torch.arange(x[1], dtype=torch.long, device=x.device))
        x = x + pos_emb
        return self.dropout(x)

# Helper function to generate causal mask
def generate_causal_mask(sz, device):
    mask = torch.triu(torch.ones(sz, sz, device=device) * float('-inf'), diagonal=1)
    return mask

# Transformer-based Model with Causal Masking
class TransformerModel(nn.Module):
    def __init__(self, input_dim=1, d_model=64, nhead=4, num_layers=2, dropout=0.2):
        super(TransformerModel, self).__init__()

        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1)
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)

    def forward(self, x):
        # x shape: [batch_size, seq_len, input_dim]
        batch_size, seq_len, _ = x.size()

        x = self.encoder(x)  # [batch_size, seq_len, d_model]
        x = x.transpose(0, 1)  # [seq_len, batch_size, d_model]
        x = self.pos_encoder(x)

        causal_mask = generate_causal_mask(seq_len, x.device)  # [seq_len, seq_len]
        x = self.transformer_encoder(x, mask=causal_mask)

        x = x.transpose(0, 1)  # [batch_size, seq_len, d_model]
        out = self.decoder(x[:, -1, :])  # take the last time step
        return out

model = TransformerModel().to(device)





In [12]:
# Train the model
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3, verbose=True)

epochs = 1000
early_stop_count = 0
min_val_loss = float('inf')

for epoch in range(epochs):
    model.train()
    train_losses = []
    for batch in train_loader:
        x_batch, y_batch = batch
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        train_losses.append(loss.item())
        loss.backward() # calculate gradients
        optimizer.step() # update weights based on gradients and learning rate

    train_loss = np.mean(train_losses)

    # Validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for batch in test_loader:
            x_batch, y_batch = batch
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            val_losses.append(loss.item())

    val_loss = np.mean(val_losses)
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        early_stop_count = 0
    else:
        early_stop_count += 1

    if early_stop_count >= 5:
        print("Early stopping!")
        break
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
    
# Evaluation
from sklearn.metrics import mean_absolute_error
# Evaluation
model.eval()
predictions = []
actuals = []

with torch.no_grad():
    for batch in test_loader:
        x_batch, y_batch = batch
        x_batch = x_batch.to(device)
        outputs = model(x_batch)
        predictions.extend(outputs.squeeze().tolist())
        actuals.extend(y_batch.squeeze().tolist())



  _torch_pytree._register_pytree_node(


torch.Size([30, 32, 64])


TypeError: arange() received an invalid combination of arguments - got (Tensor, device=torch.device, dtype=torch.dtype), but expected one of:
 * (Number end, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (Number start, Number end, *, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (Number start, Number end, Number step, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


In [None]:
# Convert to numpy arrays and inverse transform
predictions = np.array(predictions).reshape(-1, 1)
actuals = np.array(actuals).reshape(-1, 1)

predictions_inv = scaler.inverse_transform(predictions)
actuals_inv = scaler.inverse_transform(actuals)

# Compute Scores
rmse = np.sqrt(np.mean((predictions_inv - actuals_inv) ** 2))
mae = mean_absolute_error(actuals_inv, predictions_inv)
re = np.mean(np.abs((actuals_inv - predictions_inv) / actuals_inv))  # Percentage form


print(f"Score (Relative Error): {re:.4f}")
print(f"Score (MAE): {mae:.4f}")
print(f"Score (RMSE): {rmse:.4f}")


Score (Relative Error): 0.0543
Score (MAE): 0.0244
Score (RMSE): 0.0522


In [None]:
# auto_regressive both improve performance and needs less training resources