In [1]:
!pip -q install -U keras-tuner gputil


  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for gputil (setup.py) ... [?25l[?25hdone


In [6]:
import os, json, time, platform, zipfile
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt
import GPUtil
from google.colab import files
from google.colab import drive

In [2]:
# Colab: Mount Drive \
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [8]:
# Paths
DATA_DIR = "/content/drive/MyDrive/EdgeMeter_AIv2/data"
WIN_KEY  = "48to12"
WIN_TAG  = "48_12"

HP_PATH  = os.path.join(DATA_DIR, f"best_lipformer_hp_{WIN_KEY}.json")
LOG_PATH = os.path.join(DATA_DIR, f"lipformer_tuning_log_{WIN_KEY}.json")

# Reproducibility
tf.random.set_seed(42)
np.random.seed(42)


In [9]:
# GPU info
gpus = tf.config.list_physical_devices('GPU')
print("GPUs visible to TF:", gpus)
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except Exception:
        pass
gpu_name = GPUtil.getGPUs()[0].name if GPUtil.getGPUs() else "None"
print("GPU in use:", gpu_name)

GPUs visible to TF: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU in use: NVIDIA A100-SXM4-40GB


In [10]:
# Loading Data (48→12)
def _path(d, base, tag):
    p1 = os.path.join(d, f"{base}_{tag}.npy")
    p0 = os.path.join(d, f"{base}.npy")
    if os.path.exists(p1): return p1
    if os.path.exists(p0): return p0
    raise FileNotFoundError(f"Missing {base}_{tag}.npy (or {base}.npy) in {d}")

X_train = np.load(_path(DATA_DIR, "X_train", WIN_TAG))
y_train = np.load(_path(DATA_DIR, "y_train", WIN_TAG))
X_val   = np.load(_path(DATA_DIR, "X_val",   WIN_TAG))
y_val   = np.load(_path(DATA_DIR, "y_val",   WIN_TAG))
X_test  = np.load(_path(DATA_DIR, "X_test",  WIN_TAG))
y_test  = np.load(_path(DATA_DIR, "y_test",  WIN_TAG))

# Shape checks
assert X_train.ndim==3 and X_val.ndim==3 and X_test.ndim==3, "X must be (N,T,F)"
assert y_train.ndim==2 and y_val.ndim==2 and y_test.ndim==2, "y must be (N,12)"
assert y_train.shape[1]==12 and y_val.shape[1]==12 and y_test.shape[1]==12, "y must have 12 steps"
assert X_train.shape[1]==48, f"Expected T=48; got {X_train.shape[1]}"

for name, arr in [("X_train",X_train), ("X_val",X_val), ("X_test",X_test),
                  ("y_train",y_train), ("y_val",y_val), ("y_test",y_test)]:
    if np.isnan(arr).any():
        raise ValueError(f"NaNs detected in {name}")

timesteps  = X_train.shape[1]  # 48
n_features = X_train.shape[2]
out_steps  = y_train.shape[1]  # 12

print(f"Train X: {X_train.shape} | y: {y_train.shape}")
print(f"Val   X: {X_val.shape}   | y: {y_val.shape}")
print(f"Test  X: {X_test.shape}  | y: {y_test.shape}")


Train X: (10719826, 48, 11) | y: (10719826, 12)
Val   X: (3072784, 48, 11)   | y: (3072784, 12)
Test  X: (1536392, 48, 11)  | y: (1536392, 12)


In [11]:
# 5% subset
subset_frac   = 0.05
n_train_small = max(1, int(subset_frac * X_train.shape[0]))
n_val_small   = max(1, int(subset_frac * X_val.shape[0]))

X_train_small = X_train[:n_train_small]
y_train_small = y_train[:n_train_small]
X_val_small   = X_val[:n_val_small]
y_val_small   = y_val[:n_val_small]

print(f"Subset shapes -> X_train_small: {X_train_small.shape} | X_val_small: {X_val_small.shape}")


Subset shapes -> X_train_small: (535991, 48, 11) | X_val_small: (153639, 48, 11)


In [12]:
# LiPFormer V2
def patchify(x, patch_size):
    # (B, T, F) -> (B, P, patch_size*F), P = floor(T/patch_size)
    T = tf.shape(x)[1]
    F = tf.shape(x)[2]
    t_trim = (T // patch_size) * patch_size
    x = x[:, :t_trim, :]
    P = t_trim // patch_size
    return tf.reshape(x, [tf.shape(x)[0], P, patch_size * F])

def lipformer_block(x, hp):
    """V1 LiPFormer idea + V2 upgrades:
       - Project to embed_dim
       - PreNorm -> Attention (choose MHA 2–4 heads OR lightweight 'linear' path)
       - Residual
       - PreNorm -> FFN (bigger ff_dim) + dropout
       - Residual
    """
    # search space
    patch_embed  = hp.Choice("embed_dim", values=[64, 96, 128])
    use_mha      = hp.Boolean("use_mha", default=True)
    num_heads    = hp.Int("num_heads", 2, 4, step=1)          # only used if use_mha=True
    key_dim      = hp.Int("key_dim",   16, 64, step=16)       # only used if use_mha=True
    attn_dropout = hp.Choice("attn_dropout", values=[0.0, 0.1, 0.2])
    ff_dim       = hp.Choice("ff_dim", values=[64, 128, 192, 256])  # ↑ vs V1
    ff_dropout   = hp.Choice("ff_dropout", values=[0.0, 0.1, 0.2])

    # Project patches to a stable embed size
    x = layers.Dense(patch_embed)(x)

    # Attention block
    y = layers.LayerNormalization(epsilon=1e-6)(x)
    if use_mha:
        # Multi-Head Attention (2–4 heads)
        y = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=key_dim, dropout=attn_dropout
        )(y, y)
    else:
        # Lightweight "linear" attention path
        q = layers.Dense(patch_embed)(y)
        k = layers.Dense(patch_embed)(y)
        v = layers.Dense(patch_embed)(y)
        scale = tf.math.sqrt(tf.cast(patch_embed, tf.float32))
        attn = tf.nn.softmax(tf.matmul(q, k, transpose_b=True) / scale, axis=-1)
        y = tf.matmul(attn, v)
        if attn_dropout > 0.0:
            y = layers.Dropout(attn_dropout)(y)
    x = layers.Add()([x, y])  # residual

    # FFN block
    y = layers.LayerNormalization(epsilon=1e-6)(x)
    y = layers.Dense(ff_dim, activation="gelu")(y)
    if ff_dropout > 0.0:
        y = layers.Dropout(ff_dropout)(y)
    y = layers.Dense(patch_embed)(y)
    x = layers.Add()([x, y])  # residual
    return x

def make_builder(timesteps, n_features, out_steps):
    def build_model(hp):
        patch_size = hp.Choice("patch_size", values=[4, 6, 8, 12])

        inputs = keras.Input(shape=(timesteps, n_features))
        x = layers.Lambda(lambda t: patchify(t, patch_size))(inputs)  # (B,P,patch_size*F)
        x = lipformer_block(x, hp)

        x = layers.GlobalAveragePooling1D()(x)
        outputs = layers.Dense(out_steps)(x)

        model = keras.Model(inputs, outputs)
        lr = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
        model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=lr),
            loss="mse",
            metrics=["mae"]
        )
        return model
    return build_model

builder = make_builder(timesteps, n_features, out_steps)


In [13]:
# Tuner
tuner = kt.BayesianOptimization(
    builder,
    objective="val_loss",
    max_trials=20,
    directory=DATA_DIR,
    project_name=f"lipformer_retune_{WIN_KEY}"
)

early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=4, restore_best_weights=True
)

device = "/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0"
print("Using device:", device)

start_time = time.time()
with tf.device(device):
    tuner.search(
        X_train_small, y_train_small,
        validation_data=(X_val_small, y_val_small),
        epochs=50,
        batch_size=512,
        callbacks=[early_stop],
        verbose=1
    )
end_time = time.time()


Trial 20 Complete [00h 00m 01s]

Best val_loss So Far: 0.088422492146492
Total elapsed time: 00h 37m 36s


In [14]:
# Saving best HPs
best_hp = tuner.get_best_hyperparameters(1)[0]
hp_dict = {
    "patch_size":     best_hp.get("patch_size"),
    "embed_dim":      best_hp.get("embed_dim"),
    "use_mha":        best_hp.get("use_mha"),
    "num_heads":      best_hp.get("num_heads") if best_hp.get("use_mha") else None,
    "key_dim":        best_hp.get("key_dim")   if best_hp.get("use_mha") else None,
    "attn_dropout":   best_hp.get("attn_dropout"),
    "ff_dim":         best_hp.get("ff_dim"),
    "ff_dropout":     best_hp.get("ff_dropout"),
    "learning_rate":  best_hp.get("learning_rate"),
}
with open(HP_PATH, "w") as f:
    json.dump(hp_dict, f, indent=4)
print(f"[{WIN_KEY}] Best HPs saved → {HP_PATH}")


[48to12] Best HPs saved → /content/drive/MyDrive/EdgeMeter_AIv2/data/best_lipformer_hp_48to12.json


In [15]:
# Log
tmp_model = tuner.get_best_models(1)[0]
platform_info = platform.platform()

tune_log = {
    "window": WIN_KEY,
    "model": "LiPFormerV2",
    "task": "Smart Meter Energy Forecasting",
    "tuning_type": "BayesianOptimization",
    "subset_frac": subset_frac,
    "timesteps": int(timesteps),
    "n_features": int(n_features),
    "out_steps": int(out_steps),
    "tuning_time_minutes": round((end_time - start_time) / 60, 2),
    "best_hyperparameters": hp_dict,
    "total_params": int(tmp_model.count_params()),
    "input_shape": list(X_train.shape[1:]),
    "sequence_length": int(X_train.shape[1]),
    "gpu_used": gpu_name,
    "platform": platform_info,
    "log_type": "Tuning"
}
with open(LOG_PATH, "w") as f:
    json.dump(tune_log, f, indent=4)

print(f"[{WIN_KEY}] Tuning log  → {LOG_PATH}")
print("LiPFormer V2 48→12 tuning complete.")

[48to12] Tuning log  → /content/drive/MyDrive/EdgeMeter_AIv2/data/lipformer_tuning_log_48to12.json
LiPFormer V2 48→12 tuning complete.


  saveable.load_own_variables(weights_store.get(inner_path))
