# Imports & prerequisites

In [8]:
!cp /content/drive/MyDrive/Colab\ Notebooks/include/data_utils.py /content/
!cp /content/drive/MyDrive/Colab\ Notebooks/include/tuning_utils.py /content/
!cp /content/drive/MyDrive/Colab\ Notebooks/include/models.py /content/
!cp /content/drive/MyDrive/Colab\ Notebooks/include/sca_utils.py /content/
!pip install keras-tuner --upgrade



In [18]:
import tensorflow as tf
import numpy as np
from data_utils import SyntheticDataLoader, HdfToTfrAdapter, TfrDataLoader
import os, h5py

# Data preparation

In order to run an efficient tuning campaign, data will be converted to the TF record format.

In [10]:
# --- Configuration ---
SYNTHETIC_PROFILING_BASE_DIR = "/content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows"
OUTPUT_DIR = os.path.join(SYNTHETIC_PROFILING_BASE_DIR, "tfrecord_data")
BYTES_TO_CONVERT = list(range(16))
WINDOW_TO_USE = 0

In [11]:
hdf_adapter = HdfToTfrAdapter()
hdf_adapter.convert_bytes(SYNTHETIC_PROFILING_BASE_DIR, BYTES_TO_CONVERT, OUTPUT_DIR, WINDOW_TO_USE)

Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_0.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_0.tfrecord


Converting byte_0.h5: 100%|██████████| 25001/25001 [00:08<00:00, 2889.25it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_0.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_1.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_1.tfrecord


Converting byte_1.h5: 100%|██████████| 25001/25001 [00:16<00:00, 1500.05it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_1.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_2.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_2.tfrecord


Converting byte_2.h5: 100%|██████████| 25001/25001 [00:12<00:00, 2014.06it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_2.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_3.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_3.tfrecord


Converting byte_3.h5: 100%|██████████| 25001/25001 [00:11<00:00, 2154.85it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_3.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_4.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_4.tfrecord


Converting byte_4.h5: 100%|██████████| 25001/25001 [00:08<00:00, 2826.00it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_4.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_5.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_5.tfrecord


Converting byte_5.h5: 100%|██████████| 25001/25001 [00:09<00:00, 2606.33it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_5.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_6.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_6.tfrecord


Converting byte_6.h5: 100%|██████████| 25001/25001 [00:09<00:00, 2583.69it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_6.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_7.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_7.tfrecord


Converting byte_7.h5: 100%|██████████| 25001/25001 [00:10<00:00, 2391.41it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_7.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_8.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_8.tfrecord


Converting byte_8.h5: 100%|██████████| 25001/25001 [00:09<00:00, 2771.28it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_8.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_9.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_9.tfrecord


Converting byte_9.h5: 100%|██████████| 25001/25001 [00:08<00:00, 2919.19it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_9.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_10.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_10.tfrecord


Converting byte_10.h5: 100%|██████████| 25001/25001 [00:08<00:00, 2981.79it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_10.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_11.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_11.tfrecord


Converting byte_11.h5: 100%|██████████| 25001/25001 [00:09<00:00, 2711.09it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_11.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_12.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_12.tfrecord


Converting byte_12.h5: 100%|██████████| 25001/25001 [00:09<00:00, 2629.51it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_12.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_13.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_13.tfrecord


Converting byte_13.h5: 100%|██████████| 25001/25001 [00:09<00:00, 2708.09it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_13.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_14.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_14.tfrecord


Converting byte_14.h5: 100%|██████████| 25001/25001 [00:08<00:00, 2852.78it/s]


Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_14.tfrecord
Starting conversion: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_15.h5 -> /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_15.tfrecord


Converting byte_15.h5: 100%|██████████| 25001/25001 [00:09<00:00, 2615.69it/s]

Conversion complete for /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_15.tfrecord





## Sanity checks

Perform the sanity checks to validate the conversion - data in hdf files should be identical to the tfrecord data.

In [14]:
# --- Configuration ---
TFRECORD_FILE_TO_INSPECT = os.path.join(SYNTHETIC_PROFILING_BASE_DIR, "tfrecord_data/byte_6.tfrecord")
INPUT_DIM = 701

# --- Inspection Logic ---
tfr_data_loader = TfrDataLoader()

if not os.path.exists(TFRECORD_FILE_TO_INSPECT):
    print(f"Error: File not found at '{TFRECORD_FILE_TO_INSPECT}'")
else:
    # 1. Create a dataset from the file
    dataset = tf.data.TFRecordDataset(TFRECORD_FILE_TO_INSPECT)

    # 2. Take just the first record
    first_record = dataset.take(1)

    # 3. Parse the record using the method from the class instance
    parsed_record = first_record.map(tfr_data_loader._parse_tfrecord_fn)

    # 4. Iterate and print the contents
    print(f"--- Inspecting first record from: {TFRECORD_FILE_TO_INSPECT} ---\n")
    for trace, label, plaintext, key_byte in parsed_record:
        print(f"Trace Shape: {trace.shape}")
        print(f"Trace Data Type: {trace.dtype}")
        print("-" * 20)
        print(f"Label: {label.numpy()}")
        print(f"Label Data Type: {label.dtype}")
        print("-" * 20)
        print(f"Plaintext Byte: {plaintext.numpy()}")
        print(f"Plaintext Data Type: {plaintext.dtype}")
        print("-" * 20)
        print(f"Key Byte: {key_byte.numpy()}")
        print(f"Key Byte Data Type: {key_byte.dtype}")

--- Inspecting first record from: /content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_6.tfrecord ---

Trace Shape: (701, 1)
Trace Data Type: <dtype: 'float32'>
--------------------
Label: 242
Label Data Type: <dtype: 'int32'>
--------------------
Plaintext Byte: 195
Plaintext Data Type: <dtype: 'uint8'>
--------------------
Key Byte: 199
Key Byte Data Type: <dtype: 'uint8'>


In [20]:
# Check the match between hdf5 and TFRecord data

# --- Configuration ---
WINDOW_TO_USE = 0
INPUT_DIM = 701

def _integrity_check_parser(example_proto):
    """A simplified parser for this integrity check cell."""
    feature_description = {
        'trace': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }
    example = tf.io.parse_single_example(example_proto, feature_description)
    trace = tf.io.decode_raw(example['trace'], out_type=tf.float32)
    trace = tf.reshape(trace, [INPUT_DIM, 1])
    label = tf.cast(example['label'], tf.int32)
    return trace, label

for target_byte in range(16):
    H5_FILE_PATH = f"/content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/byte_{target_byte}.h5"
    TFRECORD_FILE_PATH = f"/content/drive/MyDrive/mbedtls_synthetic_traces/RANDOM_datasets/profiling_windows/tfrecord_data/byte_{target_byte}.tfrecord"

    # --- 1. Ground Truth: Read the FIRST record (index 0) directly from HDF5 ---
    with h5py.File(H5_FILE_PATH, 'r') as hf:
        h5_trace = hf['windows'][0, WINDOW_TO_USE, :].astype('float32')
        h5_trace = np.expand_dims(h5_trace, axis=-1)
        h5_label = hf['labels'][0].astype('int32')

    # --- 2. Test Sample: Read the FIRST record from the TFRecord file ---
    raw_dataset = tf.data.TFRecordDataset(TFRECORD_FILE_PATH)
    parsed_dataset = raw_dataset.map(_integrity_check_parser)
    for tfrecord_trace, tfrecord_label in parsed_dataset.take(1):
        tfrecord_trace = tfrecord_trace.numpy()
        tfrecord_label = tfrecord_label.numpy()

    # --- 3. Perform the apples-to-apples comparison ---
    print(f"--- Data Integrity Check for byte_{target_byte} ---")
    are_labels_equal = (h5_label == tfrecord_label)
    are_traces_equal = np.allclose(h5_trace, tfrecord_trace, atol=1e-6)

    if are_traces_equal and are_labels_equal:
        print("✅ Success! The data in the TFRecord file is a match.")
    else:
        print(f"HDF5 Label: {h5_label}, TFRecord Label: {tfrecord_label}")
        print(f"Labels are identical: {are_labels_equal}")
        print(f"Traces are identical: {are_traces_equal}")
        print("❌ Failure. The data is still not matching.")

--- Data Integrity Check for byte_0 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_1 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_2 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_3 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_4 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_5 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_6 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_7 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_8 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_9 ---
✅ Success! The data in the TFRecord file is a match.
--- Data Integrity Check for byte_10 ---
✅ Success! The data in the TF

# Data preparation - RPA data

As with the general, random, dataset perform the conversion on the RPA-specific data.

In [None]:
SYNTHETIC_PROFILING_BASE_DIR = "/content/drive/MyDrive/mbedtls_synthetic_traces/RPA_traces/attack_windows"
OUTPUT_DIR = os.path.join(SYNTHETIC_PROFILING_BASE_DIR, "tfrecord_data")

hdf_adapter = HdfToTfrAdapter()

hdf_adapter.convert_bytes(range(16), OUTPUT_DIR)