In [15]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from icecream import ic
import plotly.express as px
from model_gpt import GPTLanguageModel
has_mps = torch.backends.mps.is_built()
# device = "cpu"
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: mps


In [2]:
fn = "./datasets/CALCE/CALCE.csv"
df = pd.read_csv(fn)
df.columns

Index(['cycle', 'capacity_CS2_35', 'capacity_CS2_36', 'capacity_CS2_37',
       'capacity_CS2_38'],
      dtype='object')

In [None]:
spots_train = df["capacity_CS2_36"].dropna().to_numpy().reshape(-1, 1)
spots_test = df["capacity_CS2_35"].dropna().to_numpy().reshape(-1, 1)

In [10]:
print(f'spots_train.shape: {spots_train.shape}')
print(f'spots_train[0:10]: {spots_train[0:10]}')
print(f'spots_train[-10:]: {spots_train[-10:]}')

spots_train.shape: (936, 1)
spots_train[0:10]: [[1.13380661]
 [1.13341347]
 [1.13282861]
 [1.12703327]
 [1.12671169]
 [1.12426102]
 [1.11501152]
 [1.10896661]
 [1.10758826]
 [1.10577204]]
spots_train[-10:]: [[0.17422948]
 [0.1742386 ]
 [0.17423711]
 [0.16506673]
 [0.16506295]
 [0.16505914]
 [0.16507282]
 [0.16506366]
 [0.16506664]
 [0.16505913]]


In [11]:
scaler = StandardScaler()
spots_train = scaler.fit_transform(spots_train).flatten().tolist()
spots_test = scaler.transform(spots_test).flatten().tolist()
# Sequence Data Preparation
SEQUENCE_SIZE = 8

In [14]:
print(f'spots_train.len: {len(spots_train)}')
print(f'spots_train[0:10]: {spots_train[0:10]}')
print(f'spots_train[-10:]: {spots_train[-10:]}')

spots_train.len: 936
spots_train[0:10]: [1.1998494932510564, 1.1983320561944526, 1.1960746249612668, 1.1737058949769688, 1.1724646418337108, 1.1630056013799308, 1.127304561701165, 1.103972568777473, 1.0986524207465265, 1.0916422094762916]
spots_train[-10:]: [-2.503907245103581, -2.503872016219989, -2.5038777726247106, -2.5392734322452553, -2.5392880269808358, -2.539302743065862, -2.5392499156198314, -2.539285286941836, -2.539273770177477, -2.5393027867082005]


In [None]:
def to_sequences(seq_size, obs):
    x = []
    y = []
    for i in range(len(obs) - seq_size):
        window = obs[i:(i + seq_size)]
        after_window = obs[i + seq_size]
        x.append(window)
        y.append(after_window)
    return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, 1), torch.tensor(y, dtype=torch.float32).view(-1, 1)

x_train, y_train = to_sequences(SEQUENCE_SIZE, spots_train)
x_test, y_test = to_sequences(SEQUENCE_SIZE, spots_test)




'DataLoader\n- Batches data for efficienct training\n- Shuffles data befoe each epoch to prevent model from memorizing the order\nIt randomly select batch(32) size from train_dataset and group them into a batch [32, (sequence)10, 1] for x_train and [32, 1] for y_train\nreturn as PyTorch Tensor for training\n'

In [16]:

model = GPTLanguageModel().to(device)



In [17]:
# Train the model
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3, verbose=True)

epochs = 1000
early_stop_count = 0
min_val_loss = float('inf')

In [None]:
x_batch, y_batch = batch
x_batch, y_batch = x_batch.to(device), y_batch.to(device)

optimizer.zero_grad()
outputs = model(x_batch)
# ic(outputs, y_batch)
loss = criterion(outputs, y_batch)
train_losses.append(loss.item())
loss.backward() # calculate gradients
optimizer.step() # update weights based on gradients and learning rate

In [None]:


for epoch in range(1):
    model.train()
    train_losses = []
    for batch in train_loader:
        x_batch, y_batch = batch
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(x_batch)
        # ic(outputs, y_batch)
        loss = criterion(outputs, y_batch)
        train_losses.append(loss.item())
        loss.backward() # calculate gradients
        optimizer.step() # update weights based on gradients and learning rate

    train_loss = np.mean(train_losses)

    # Validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for batch in test_loader:
            x_batch, y_batch = batch
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            val_losses.append(loss.item())

    val_loss = np.mean(val_losses)
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        early_stop_count = 0
    else:
        early_stop_count += 1

    if early_stop_count >= 5:
        print("Early stopping!")
        break
    print(f"Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
    
# Evaluation
from sklearn.metrics import mean_absolute_error
# Evaluation
model.eval()
predictions = []
actuals = []

with torch.no_grad():
    for batch in test_loader:
        x_batch, y_batch = batch
        x_batch = x_batch.to(device)
        outputs = model(x_batch)
        predictions.extend(outputs.squeeze().tolist())
        actuals.extend(y_batch.squeeze().tolist())



Epoch 1/1000, Train Loss: 0.8894, Validation Loss: 2.1469


In [10]:
# Convert to numpy arrays and inverse transform
predictions = np.array(predictions).reshape(-1, 1)
actuals = np.array(actuals).reshape(-1, 1)

predictions_inv = scaler.inverse_transform(predictions)
actuals_inv = scaler.inverse_transform(actuals)

# Compute Scores
rmse = np.sqrt(np.mean((predictions_inv - actuals_inv) ** 2))
mae = mean_absolute_error(actuals_inv, predictions_inv)
re = np.mean(np.abs((actuals_inv - predictions_inv) / actuals_inv))  # Percentage form


print(f"Score (Relative Error): {re:.4f}")
print(f"Score (MAE): {mae:.4f}")
print(f"Score (RMSE): {rmse:.4f}")


Score (Relative Error): 0.5159
Score (MAE): 0.3188
Score (RMSE): 0.3779
