# LG3 MLP baseline (token input) demo
Use generalist VQ-VAE to tokenize 2-day input, then MLP predicts 1-day Power.


In [1]:
import os, sys
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt

# --- config ---
ROOT = os.path.abspath(os.path.join('..', '..'))
DATA_DIR_TRAIN = os.path.join(ROOT, 'lg3', 'data', 'processed_sources', 'elec1_f2')
DATA_DIR_TEST = os.path.join(ROOT, 'lg3', 'data', 'processed_sources', 'ohsung_f2')
TARGET_COL = 'Power'
TIN = 576  # 2 days at 5-min
TOUT = 288  # 1 day at 5-min
BATCH_SIZE = 128
EPOCHS = 20
LR = 1e-3

# generalist tokenizer checkpoint (update if needed)
TOKENIZER_CKPT = os.path.abspath(os.path.join(
    ROOT, 'data', 'TOTEM_data_and_pretrained_tokenizers',
    'generatlist_pretrained_tokenizers', 'forecasting',
    'CD64_CW256_CF4_BS4096_ITR120000', 'checkpoints', 'final_model.pth'
))

# add forecasting package to path for generalist tokenizer
if ROOT not in sys.path:
    sys.path.insert(0, ROOT)
FORECASTING_ROOT = os.path.join(ROOT, 'forecasting')
if FORECASTING_ROOT not in sys.path:
    sys.path.insert(0, FORECASTING_ROOT)


In [2]:
def load_split(split, data_dir):
    path = os.path.join(data_dir, f'lg3_{split}.csv')
    df = pd.read_csv(path, parse_dates=[0], index_col=0)
    df = df.select_dtypes(include=[np.number]).dropna(how='any')
    df = df[~df.index.duplicated(keep='first')]
    return df[[TARGET_COL]].sort_index()

def build_sequences(values, seq_len, pred_len):
    total = len(values)
    max_start = total - (seq_len + pred_len) + 1
    if max_start <= 0:
        raise ValueError('Not enough rows to build sequences.')
    x = np.empty((max_start, seq_len), dtype=np.float32)
    y = np.empty((max_start, pred_len), dtype=np.float32)
    for i in range(max_start):
        x[i] = values[i : i + seq_len, 0]
        y[i] = values[i + seq_len : i + seq_len + pred_len, 0]
    return x, y

train_df = load_split('train', DATA_DIR_TRAIN)
val_df = load_split('val', DATA_DIR_TRAIN)
test_df = load_split('test', DATA_DIR_TEST)

X_train, y_train = build_sequences(train_df.to_numpy(np.float32), TIN, TOUT)
X_val, y_val = build_sequences(val_df.to_numpy(np.float32), TIN, TOUT)
X_test, y_test = build_sequences(test_df.to_numpy(np.float32), TIN, TOUT)

print('X_train', X_train.shape, 'y_train', y_train.shape)
print('X_val', X_val.shape, 'y_val', y_val.shape)
print('X_test', X_test.shape, 'y_test', y_test.shape)


X_train (10157, 576) y_train (10157, 288)
X_val (711, 576) y_val (711, 288)
X_test (4204, 576) y_test (4204, 288)


In [None]:
# min-max normalization (keeps zeros if min=0)
x_min = X_train.min()
x_max = X_train.max()
x_scale = (x_max - x_min) + 1e-6
X_train_n = (X_train - x_min) / x_scale
X_val_n = (X_val - x_min) / x_scale
X_test_n = (X_test - x_min) / x_scale

y_min = y_train.min()
y_max = y_train.max()
y_scale = (y_max - y_min) + 1e-6
y_train_n = (y_train - y_min) / y_scale
y_val_n = (y_val - y_min) / y_scale
y_test_n = (y_test - y_min) / y_scale

from lib.models.vqvae import vqvae as gen_vqvae  # forecasting generalist

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
tokenizer = torch.load(TOKENIZER_CKPT, map_location=device, weights_only=False)
tokenizer.eval()
compression_factor = getattr(tokenizer, 'compression_factor', 4)

def tokenize_batch(x_batch):
    x_t = torch.tensor(x_batch, dtype=torch.float32, device=device)
    with torch.no_grad():
        z = tokenizer.encoder(x_t, compression_factor)
        _, _, _, _, encoding_indices, _ = tokenizer.vq(z)
    B = x_t.shape[0]
    L = z.shape[-1]
    return encoding_indices.view(B, L).detach().cpu().numpy()

def tokenize_all(x, batch_size=256):
    tokens = []
    for i in range(0, len(x), batch_size):
        tokens.append(tokenize_batch(x[i : i + batch_size]))
    return np.concatenate(tokens, axis=0)

X_train_tok = tokenize_all(X_train_n)
X_val_tok = tokenize_all(X_val_n)
X_test_tok = tokenize_all(X_test_n)

print('X_train_tok', X_train_tok.shape)
print('X_val_tok', X_val_tok.shape)
print('X_test_tok', X_test_tok.shape)


UnpicklingError: Weights only load failed. This file can still be loaded, to do so you have two options, [1mdo those steps only if you trust the source of the checkpoint[0m. 
	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL lib.models.vqvae.vqvae was not an allowed global by default. Please use `torch.serialization.add_safe_globals([lib.models.vqvae.vqvae])` or the `torch.serialization.safe_globals([lib.models.vqvae.vqvae])` context manager to allowlist this global if you trust this class/function.

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.

In [None]:
# token -> embedding -> MLP
codebook = tokenizer.vq._embedding.weight.detach().clone()
num_codes, embed_dim = codebook.shape

class TokenMLP(torch.nn.Module):
    def __init__(self, num_codes, embed_dim, seq_len, out_dim):
        super().__init__()
        self.embed = torch.nn.Embedding(num_codes, embed_dim)
        self.embed.weight.data.copy_(codebook)
        self.embed.weight.requires_grad = False
        self.net = torch.nn.Sequential(
            torch.nn.Linear(seq_len * embed_dim, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 256),
            torch.nn.ReLU(),
            torch.nn.Linear(256, out_dim),
        )

    def forward(self, x):
        z = self.embed(x)
        z = z.view(z.shape[0], -1)
        return self.net(z)

model = TokenMLP(num_codes, embed_dim, X_train_tok.shape[1], TOUT).to(device)
opt = torch.optim.Adam(model.parameters(), lr=LR)
loss_fn = torch.nn.MSELoss()

X_train_t = torch.from_numpy(X_train_tok).long()
y_train_t = torch.from_numpy(y_train_n).float()

train_ds = torch.utils.data.TensorDataset(X_train_t, y_train_t)
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

for epoch in range(EPOCHS):
    model.train()
    total = 0.0
    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)
        opt.zero_grad()
        pred = model(xb)
        loss = loss_fn(pred, yb)
        loss.backward()
        opt.step()
        total += loss.item() * xb.size(0)
    if (epoch + 1) % 5 == 0:
        print(f'epoch {epoch+1} | train mse {total / len(train_ds):.6f}')


In [None]:
model.eval()
with torch.no_grad():
    X_test_t = torch.from_numpy(X_test_tok).long().to(device)
    pred_n = model(X_test_t).cpu().numpy()

pred = pred_n * y_scale + y_min
mse = np.mean((y_test - pred) ** 2)
mae = np.mean(np.abs(y_test - pred))
print(f'Test MSE: {mse:.6f} | MAE: {mae:.6f}')

# plot one example with input context
idx = int(0.2 * X_test.shape[0])
inp = X_test[idx]
true = y_test[idx]
pred_y = pred[idx]

full_x = np.concatenate([inp, true])
full_pred = np.concatenate([inp, pred_y])

plt.figure(figsize=(12, 4))
plt.plot(full_x, label='true (input+target)', linewidth=1.0)
plt.plot(full_pred, label='pred (input+forecast)', linewidth=1.0)
plt.axvline(TIN - 1, color='k', linestyle='--', linewidth=0.8)
plt.legend()
plt.title('Token MLP: input context + 1-day forecast')
plt.tight_layout()
plt.show()
