# Predict_TF_SpatialFreqs — GPU-Optimized A/B and Per-Phase Timing

This notebook compares your **legacy** NumPy-based pipeline with a **TensorFlow GPU-optimized** version.

It reports per-phase timings for the GPU path (patch extraction, feature, predict) and total time for legacy.


In [None]:
import os, time, numpy as np, tensorflow as tf
print('TF version:', tf.__version__)
print('GPU devices:', tf.config.list_physical_devices('GPU'))

from ml_spatialfreq_utils import TrainedModelTF, synth_fringe
import ml_spatialfreq_utils_v2 as v2
#import ml_spatialfreq_utils_tf as tfutils  # from previous step (feature-only TF module)


## Load model + test image
Adjust paths below to your saved model/scaler/meta. As a quick smoke test we generate a synthetic fringe.

In [None]:
# Example: synthetic image; replace with your real data
NR, NC = 512, 511
g = synth_fringe(NR, NC, w0_x=0.25, w0_y=0.12, modulation=1.0, background=0.0, noise_std=0.02)

# Load your trained model
# NOTE: set these paths to your files; TrainedModelTF expects model_path(+ optional scaler/meta)
model_path = 'model.h5'           # TODO: point to your actual file
scaler_path = None                # if you saved it
meta_path = None                  # if you saved it (should include patch_NR, patch_NC, r, c, optional scaler stats)
trained = TrainedModelTF(model_path=model_path, scaler_path=scaler_path, meta_path=meta_path)

feature_name = 'feature_projected_DFT'  # or 'feature_DFT' / 'feature_GV'
M_ROI = None


In [None]:
# --- Resolve model paths from the DB Excel ---
rootFolderDB = Path('../local_data/ML_Models')              # Adjust if needed
db_name = 'DB-trainingSets-OM4M007.xlsx'        # Same as MATLAB
db_sheet = 'Sheet1'                             
trainingSet_Idx = 12                              # 1-based row index

db_path = rootFolderDB / db_name
print("Reading DB:", db_path.resolve())
db_tb = pd.read_excel(db_path, sheet_name=db_sheet)

row_idx = trainingSet_Idx - 1
assert 0 <= row_idx < len(db_tb), "trainingSet_Idx out of range"

row = db_tb.iloc[row_idx].to_dict()

# Expected columns for Python artifacts (preferred). If missing, you can fill them below.
kerasModelPath = rootFolderDB / row.get('trainedModel')
scalerPath     = rootFolderDB / row.get('scaler')
featureMetadataPath  = rootFolderDB / row.get('feature_metadata')
featureName    = row.get('featureName')
patch_NR       = int(row.get('patch_NR'))
patch_NC       = int(row.get('patch_NC'))

print("kerasModel Name:", kerasModelPath)
print("scaler name    :", scalerPath)
print("feature_metadata name      :", featureMetadataPath)
print("feature Name   :", featureName, " patch:", patch_NR, "x", patch_NC)

## Run A/B comparison + timings

In [None]:
(wA, pxA, pyA, thA, QMA, wB, pxB, pyB, thB, QMB, tA, tB) = v2.benchmark_compare(g, trained, feature_name, M_ROI)


## Inspect numerical differences (quick checks)

In [None]:
def nanrms(a,b):
    m = ~np.isnan(a) & ~np.isnan(b)
    if not np.any(m):
        return np.nan
    d = a[m] - b[m]
    return float(np.sqrt(np.mean(d*d)))

print('RMS diff w_phi:', nanrms(wA, wB))
print('RMS diff phi_x:', nanrms(pxA, pxB))
print('RMS diff phi_y:', nanrms(pyA, pyB))
print('RMS diff theta:', nanrms(thA, thB))
print('RMS diff QM   :', nanrms(QMA, QMB))


## Optional: Mixed precision
If you want to experiment with mixed precision on GPUs like T4:


In [None]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')
print('Global policy set to:', mixed_precision.global_policy())
# Re-run the A/B block above to observe the effect on timings.
