In [1]:
!pip -q install -U keras-tuner gputil

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for gputil (setup.py) ... [?25l[?25hdone


In [2]:
import os, json, time, platform, zipfile
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import keras_tuner as kt
import GPUtil
from google.colab import files
from google.colab import drive

In [3]:

drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [4]:
# Paths
DATA_DIR = "/content/drive/MyDrive/EdgeMeter_AIv2/data"
WIN_KEY  = "48to12"
WIN_TAG  = "48_12"


HP_PATH  = os.path.join(DATA_DIR, f"best_tinyformer_hp_{WIN_KEY}.json")
LOG_PATH = os.path.join(DATA_DIR, f"tinyformer_tuning_log_{WIN_KEY}.json")

# Reproducibility
tf.random.set_seed(42)
np.random.seed(42)

In [5]:
# GPU info
gpus = tf.config.list_physical_devices('GPU')
print("GPUs visible to TF:", gpus)
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except Exception:
        pass
gpu_name = GPUtil.getGPUs()[0].name if GPUtil.getGPUs() else "None"
print("GPU in use:", gpu_name)


GPUs visible to TF: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU in use: NVIDIA A100-SXM4-40GB


In [6]:
# Loading Data (48-12)
def _path(d, base, tag):
    p1 = os.path.join(d, f"{base}_{tag}.npy")
    p0 = os.path.join(d, f"{base}.npy")
    if os.path.exists(p1): return p1
    if os.path.exists(p0): return p0
    raise FileNotFoundError(f"Missing {base}_{tag}.npy (or {base}.npy) in {d}")

X_train = np.load(_path(DATA_DIR, "X_train", WIN_TAG))
y_train = np.load(_path(DATA_DIR, "y_train", WIN_TAG))
X_val   = np.load(_path(DATA_DIR, "X_val",   WIN_TAG))
y_val   = np.load(_path(DATA_DIR, "y_val",   WIN_TAG))
X_test  = np.load(_path(DATA_DIR, "X_test",  WIN_TAG))
y_test  = np.load(_path(DATA_DIR, "y_test",  WIN_TAG))


In [7]:
# Shape checks
assert X_train.ndim==3 and X_val.ndim==3 and X_test.ndim==3, "X must be (N,T,F)"
assert y_train.ndim==2 and y_val.ndim==2 and y_test.ndim==2, "y must be (N,12)"
assert y_train.shape[1]==12 and y_val.shape[1]==12 and y_test.shape[1]==12, "y must have 12 steps"
assert X_train.shape[1]==48, f"Expected T=48; got {X_train.shape[1]}"


In [8]:
# NaN guard
for arr_name, arr in [("X_train", X_train), ("X_val", X_val), ("X_test", X_test),
                      ("y_train", y_train), ("y_val", y_val), ("y_test", y_test)]:
    if np.isnan(arr).any():
        raise ValueError(f"NaNs detected in {arr_name}")

timesteps  = X_train.shape[1]  # 48
n_features = X_train.shape[2]
out_steps  = y_train.shape[1]  # 12

print(f"Train X: {X_train.shape} | y: {y_train.shape}")
print(f"Val   X: {X_val.shape}   | y: {y_val.shape}")
print(f"Test  X: {X_test.shape}  | y: {y_test.shape}")


Train X: (10719826, 48, 11) | y: (10719826, 12)
Val   X: (3072784, 48, 11)   | y: (3072784, 12)
Test  X: (1536392, 48, 11)  | y: (1536392, 12)


In [9]:
# 5% Subset
subset_frac   = 0.05
n_train_small = max(1, int(subset_frac * X_train.shape[0]))
n_val_small   = max(1, int(subset_frac * X_val.shape[0]))

X_train_small = X_train[:n_train_small]
y_train_small = y_train[:n_train_small]
X_val_small   = X_val[:n_val_small]
y_val_small   = y_val[:n_val_small]

print(f"Subset shapes -> X_train_small: {X_train_small.shape} | X_val_small: {X_val_small.shape}")


Subset shapes -> X_train_small: (535991, 48, 11) | X_val_small: (153639, 48, 11)


In [10]:
# TinyFormer V2 block (V1 core + your upgrades)
def patchify(x, patch_size):
    # x: (B, T, F) → (B, P, patch_size*F), where P = floor(T/patch_size)
    T = tf.shape(x)[1]
    F = tf.shape(x)[2]
    t_trim = (T // patch_size) * patch_size
    x = x[:, :t_trim, :]
    new_len = t_trim // patch_size
    x = tf.reshape(x, [tf.shape(x)[0], new_len, patch_size * F])
    return x

def tinyformer_block(x, hp):
    """PreNorm Transformer encoder block with MHA + FFN + residuals."""
    # Hyperparams
    num_heads    = hp.Int("num_heads", min_value=2, max_value=4, step=1)
    key_dim      = hp.Int("key_dim",   min_value=16, max_value=64, step=16)
    attn_drop    = hp.Choice("attn_dropout", values=[0.0, 0.1, 0.2])
    ff_dim       = hp.Choice("ff_dim", values=[64, 128])       # ↑ from V1 (32→64/128)
    ff_dropout   = hp.Choice("ff_dropout", values=[0.1, 0.2])  # ~0.1–0.2
    embed_dim    = hp.Choice("embed_dim", values=[64, 96, 128])

    # Project patches to embed_dim (so MHA/FFN dims are stable)
    x = layers.Dense(embed_dim)(x)

    # PreNorm + MHA
    y = layers.LayerNormalization(epsilon=1e-6)(x)
    y = layers.MultiHeadAttention(
        num_heads=num_heads, key_dim=key_dim, dropout=attn_drop
    )(y, y)
    x = layers.Add()([x, y])  # residual

    # PreNorm + FFN
    y = layers.LayerNormalization(epsilon=1e-6)(x)
    y = layers.Dense(ff_dim, activation="gelu")(y)
    y = layers.Dropout(ff_dropout)(y)
    y = layers.Dense(embed_dim)(y)
    x = layers.Add()([x, y])  # residual

    return x

def make_builder(timesteps, n_features, out_steps):
    def build_model(hp):
        # TinyFormer-specific HPs (patching + block)
        patch_size = hp.Choice("patch_size", values=[4, 6, 8, 12])

        inputs = keras.Input(shape=(timesteps, n_features))
        x = layers.Lambda(lambda t: patchify(t, patch_size))(inputs)  # (B,P,patch_size*F)

        # Single TinyFormer block
        x = tinyformer_block(x, hp)

        # Pool and predict 12 steps
        x = layers.GlobalAveragePooling1D()(x)
        outputs = layers.Dense(out_steps)(x)

        model = keras.Model(inputs, outputs)
        lr = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])
        model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=lr),
            loss="mse",
            metrics=["mae"]
        )
        return model
    return build_model

builder = make_builder(timesteps, n_features, out_steps)


In [11]:
# Tuner
tuner = kt.BayesianOptimization(
    builder,
    objective="val_loss",
    max_trials=20,
    directory=DATA_DIR,
    project_name=f"tinyformer_retune_{WIN_KEY}"
)

early_stop = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=4,
    restore_best_weights=True
)

batch_size = 512

device = "/GPU:0" if tf.config.list_physical_devices("GPU") else "/CPU:0"
print("Using device:", device)

Using device: /GPU:0


In [12]:
start_time = time.time()
with tf.device(device):
    tuner.search(
        X_train_small, y_train_small,
        validation_data=(X_val_small, y_val_small),
        epochs=50,
        batch_size=batch_size,
        callbacks=[early_stop],
        verbose=1
    )
end_time = time.time()

Trial 20 Complete [00h 01m 17s]
val_loss: 0.10463100671768188

Best val_loss So Far: 0.08431573212146759
Total elapsed time: 00h 49m 56s


In [13]:
# Saving best HPs
best_hp = tuner.get_best_hyperparameters(1)[0]
hp_dict = {
    "patch_size":     best_hp.get("patch_size"),
    "embed_dim":      best_hp.get("embed_dim"),
    "num_heads":      best_hp.get("num_heads"),
    "key_dim":        best_hp.get("key_dim"),
    "attn_dropout":   best_hp.get("attn_dropout"),
    "ff_dim":         best_hp.get("ff_dim"),
    "ff_dropout":     best_hp.get("ff_dropout"),
    "learning_rate":  best_hp.get("learning_rate")
}
with open(HP_PATH, "w") as f:
    json.dump(hp_dict, f, indent=4)
print(f"[{WIN_KEY}] Best HPs saved → {HP_PATH}")

[48to12] Best HPs saved → /content/drive/MyDrive/EdgeMeter_AIv2/data/best_tinyformer_hp_48to12.json


In [14]:
# Log
tmp_model = tuner.get_best_models(1)[0]
platform_info = platform.platform()

tune_log = {
    "window": WIN_KEY,
    "model": "TinyFormerV2",
    "task": "Smart Meter Energy Forecasting",
    "tuning_type": "BayesianOptimization",
    "subset_frac": subset_frac,
    "timesteps": int(timesteps),
    "n_features": int(n_features),
    "out_steps": int(out_steps),
    "tuning_time_minutes": round((end_time - start_time) / 60, 2),
    "best_hyperparameters": hp_dict,
    "total_params": int(tmp_model.count_params()),
    "input_shape": list(X_train.shape[1:]),
    "sequence_length": int(X_train.shape[1]),
    "gpu_used": gpu_name,
    "platform": platform_info,
    "log_type": "Tuning"
}
with open(LOG_PATH, "w") as f:
    json.dump(tune_log, f, indent=4)

print(f"[{WIN_KEY}] Tuning log  → {LOG_PATH}")
print("TinyFormer V2 48→12 tuning complete.")

[48to12] Tuning log  → /content/drive/MyDrive/EdgeMeter_AIv2/data/tinyformer_tuning_log_48to12.json
✅ TinyFormer V2 48→12 tuning complete.


  saveable.load_own_variables(weights_store.get(inner_path))
