In [1]:
import os  # filesystem ops

os.makedirs("deploy_bmi/code", exist_ok=True)  # ensure folder exists

inference_py = r'''
import os
import io
import json
import base64
import pickle

import numpy as np
import tensorflow as tf
from PIL import Image


TARGET_COLS = [
    "ankle", "arm-length", "bicep", "calf", "chest", "forearm", "hip",
    "leg-length", "shoulder-breadth", "shoulder-to-crotch", "thigh",
    "waist", "wrist", "weight_kg",
]

GENDER_MAPPING = {"female": 0, "male": 1}


def _b64_to_pil(b64_str: str):
    img_bytes = base64.b64decode(b64_str)
    return Image.open(io.BytesIO(img_bytes)).convert("RGB")


def _extract_features(feature_model, pil_img: Image.Image):
    img = pil_img.resize((600, 600))
    arr = np.array(img).astype(np.float32)
    arr = np.expand_dims(arr, axis=0)
    arr = tf.keras.applications.efficientnet.preprocess_input(arr)
    feat = feature_model.predict(arr, verbose=0)
    return np.asarray(feat).reshape(-1)


def model_fn(model_dir: str):
    # --- ANN SavedModel path REQUIRED by TF Serving container ---
    ann_savedmodel = os.path.join(model_dir, "model", "1")

    # --- assets ---
    assets_dir = os.path.join(model_dir, "assets")
    w_path = os.path.join(assets_dir, "efficientnetb7_imagenet.h5")

    robust_path = os.path.join(assets_dir, "scaler_robust_features.pkl")
    height_path = os.path.join(assets_dir, "scaler_standard_features.pkl")
    target_path = os.path.join(assets_dir, "scaler_targets.pkl")

    # --- load ANN from SavedModel ---
    bmi_model = tf.keras.models.load_model(ann_savedmodel, compile=False)

    # --- EfficientNet feature extractor (NO internet download) ---
    base = tf.keras.applications.EfficientNetB7(weights=None)
    base.load_weights(w_path)
    feature_model = tf.keras.Model(inputs=base.inputs, outputs=base.layers[-2].output)

    # --- load scalers ---
    with open(robust_path, "rb") as f:
        robust_scaler = pickle.load(f)
    with open(height_path, "rb") as f:
        height_scaler = pickle.load(f)
    with open(target_path, "rb") as f:
        target_scaler = pickle.load(f)

    return {
        "bmi_model": bmi_model,
        "feature_model": feature_model,
        "robust_scaler": robust_scaler,
        "height_scaler": height_scaler,
        "target_scaler": target_scaler,
    }


def input_fn(request_body, request_content_type):
    if request_content_type != "application/json":
        raise ValueError("Only application/json is supported")

    payload = json.loads(request_body)

    front_img = _b64_to_pil(payload["front_mask_b64"])
    side_img = _b64_to_pil(payload["side_mask_b64"])
    gender = payload["gender"]
    height_cm = float(payload["height_cm"])

    return {"front_img": front_img, "side_img": side_img, "gender": gender, "height_cm": height_cm}


def predict_fn(data, m):
    feature_model = m["feature_model"]
    bmi_model = m["bmi_model"]
    robust_scaler = m["robust_scaler"]
    height_scaler = m["height_scaler"]
    target_scaler = m["target_scaler"]

    front_vec = _extract_features(feature_model, data["front_img"])
    side_vec = _extract_features(feature_model, data["side_img"])

    feats = np.concatenate([front_vec, side_vec], axis=0)
    feats_scaled = robust_scaler.transform(feats.reshape(1, -1))

    g = data["gender"]
    if isinstance(g, str):
        gender_code = float(GENDER_MAPPING[g.strip().lower()])
    else:
        gender_code = float(g)

    height_scaled = height_scaler.transform(np.array([[data["height_cm"]]], dtype=np.float32))

    x = np.concatenate(
        [feats_scaled, np.array([[gender_code]], dtype=np.float32), height_scaled.astype(np.float32)],
        axis=1,
    )

    y_scaled = bmi_model.predict(x, verbose=0)
    y = target_scaler.inverse_transform(y_scaled)

    preds = {TARGET_COLS[i]: float(y[0, i]) for i in range(len(TARGET_COLS))}
    return {"predictions": preds, "predictions_order": TARGET_COLS}


def output_fn(prediction, accept):
    if accept == "application/json":
        return json.dumps(prediction), accept
    raise ValueError("Only application/json is supported")
'''

with open("deploy_bmi/code/inference.py", "w") as f:
    f.write(inference_py)

with open("deploy_bmi/code/requirements.txt", "w") as f:
    f.write("scikit-learn\npillow\nnumpy\n")

print("‚úÖ Updated deploy_bmi/code/inference.py and requirements.txt")
print("üìÇ", os.listdir("deploy_bmi/code"))


‚úÖ Updated deploy_bmi/code/inference.py and requirements.txt
üìÇ ['requirements.txt', 'inference.py']


In [2]:
import os
import tarfile
import shutil
import boto3
from urllib.parse import urlparse
import tensorflow as tf

# ---- inputs ----
H5_TAR_S3_URI = "s3://ai-bmi-predictor/trained-models/efficientnet-models/eff-ann-v8-training-2025-12-18-11-02-51-500/output/model.tar.gz"

ROBUST_SCALER_S3_URI = "s3://ai-bmi-predictor/scalers/scaler_robust_features.pkl"
HEIGHT_SCALER_S3_URI = "s3://ai-bmi-predictor/scalers/scaler_standard_features.pkl"
TARGET_SCALER_S3_URI = "s3://ai-bmi-predictor/scalers/scaler_targets.pkl"

PACKAGED_S3_URI = "s3://ai-bmi-predictor/trained-models/efficientnet-models/eff-ann-v8-training-2025-12-18-11-02-51-500/output/packaged-for-endpoint/model.tar.gz"

# ---- helpers ----
def parse_s3_uri(uri: str):
    p = urlparse(uri)
    if p.scheme != "s3":
        raise ValueError(f"Invalid S3 URI: {uri}")
    return p.netloc, p.path.lstrip("/")

def s3_download(uri: str, local_path: str):
    b, k = parse_s3_uri(uri)
    boto3.client("s3").download_file(b, k, local_path)

def s3_upload(local_path: str, uri: str):
    b, k = parse_s3_uri(uri)
    boto3.client("s3").upload_file(local_path, b, k)

# ---- workspace ----
workdir = "bmi_pkg_endpoint_v2"
extract_dir = os.path.join(workdir, "extract")
assets_dir = os.path.join(workdir, "assets")
serving_dir = os.path.join(workdir, "model", "1")  # REQUIRED path for SavedModel
os.makedirs(extract_dir, exist_ok=True)
os.makedirs(assets_dir, exist_ok=True)
os.makedirs(serving_dir, exist_ok=True)

# ---- download & extract original tar (contains .h5) ----
local_in_tar = os.path.join(workdir, "original_model.tar.gz")
s3_download(H5_TAR_S3_URI, local_in_tar)
print("‚úÖ Downloaded:", H5_TAR_S3_URI)

with tarfile.open(local_in_tar, "r:gz") as tar:
    tar.extractall(extract_dir)
print("‚úÖ Extracted original tar")

# ---- locate .h5 ----
h5_path = None
for root, _, files in os.walk(extract_dir):
    for f in files:
        if f.lower().endswith((".h5", ".hdf5")):
            h5_path = os.path.join(root, f)
            break
    if h5_path:
        break
if not h5_path:
    raise ValueError("‚ùå No .h5 found in original tar")

print("‚úÖ Found H5:", h5_path)

# ---- convert H5 -> SavedModel at model/1/ ----
ann = tf.keras.models.load_model(h5_path, compile=False)
ann.save(serving_dir, include_optimizer=False, save_format="tf")
print("‚úÖ Exported SavedModel to:", serving_dir)

# ---- cache EfficientNetB7 imagenet weights locally (not in endpoint) ----
_ = tf.keras.applications.EfficientNetB7(weights="imagenet")

# ---- copy cached EfficientNetB7 weights into assets/ ----
keras_models_dir = os.path.expanduser("~/.keras/models")
cands = [os.path.join(keras_models_dir, f) for f in os.listdir(keras_models_dir)
         if "efficientnetb7" in f.lower() and f.lower().endswith(".h5")]
if not cands:
    raise ValueError("‚ùå EfficientNetB7 weights not found in ~/.keras/models")

weights_src = sorted(cands)[-1]
weights_dst = os.path.join(assets_dir, "efficientnetb7_imagenet.h5")
shutil.copy2(weights_src, weights_dst)
print("‚úÖ Bundled EfficientNet weights:", weights_dst)

# ---- download scalers into assets/ with exact filenames inference.py expects ----
s3_download(ROBUST_SCALER_S3_URI, os.path.join(assets_dir, "scaler_robust_features.pkl"))
s3_download(HEIGHT_SCALER_S3_URI, os.path.join(assets_dir, "scaler_standard_features.pkl"))
s3_download(TARGET_SCALER_S3_URI, os.path.join(assets_dir, "scaler_targets.pkl"))
print("‚úÖ Bundled scalers into assets/")

# ---- ensure code exists ----
assert os.path.exists("deploy_bmi/code/inference.py"), "‚ùå Missing inference.py"
assert os.path.exists("deploy_bmi/code/requirements.txt"), "‚ùå Missing requirements.txt"

# ---- build final model.tar.gz ----
out_tar = os.path.join(workdir, "model.tar.gz")
with tarfile.open(out_tar, "w:gz") as tar:
    tar.add(os.path.join(workdir, "model"), arcname="model")      # SavedModel bundle
    tar.add(assets_dir, arcname="assets")                         # scalers + effnet weights
    tar.add("deploy_bmi/code", arcname="code")                    # inference.py + requirements.txt

print("‚úÖ Created packaged tar:", out_tar)

# ---- upload ----
s3_upload(out_tar, PACKAGED_S3_URI)
print("‚úÖ Uploaded packaged model:", PACKAGED_S3_URI)


2025-12-18 17:35:45.764283: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-18 17:35:45.780049: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-12-18 17:35:45.804989: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-12-18 17:35:45.805027: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-12-18 17:35:45.820389: I tensorflow/core/platform/cpu_feature_gua

‚úÖ Downloaded: s3://ai-bmi-predictor/trained-models/efficientnet-models/eff-ann-v8-training-2025-12-18-11-02-51-500/output/model.tar.gz
‚úÖ Extracted original tar
‚úÖ Found H5: bmi_pkg_endpoint_v2/extract/eff_ann_version8.h5


2025-12-18 17:35:48.162855: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-12-18 17:35:48.209801: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-12-18 17:35:48.210851: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

INFO:tensorflow:Assets written to: bmi_pkg_endpoint_v2/model/1/assets


INFO:tensorflow:Assets written to: bmi_pkg_endpoint_v2/model/1/assets


‚úÖ Exported SavedModel to: bmi_pkg_endpoint_v2/model/1
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb7.h5
‚úÖ Bundled EfficientNet weights: bmi_pkg_endpoint_v2/assets/efficientnetb7_imagenet.h5
‚úÖ Bundled scalers into assets/
‚úÖ Created packaged tar: bmi_pkg_endpoint_v2/model.tar.gz
‚úÖ Uploaded packaged model: s3://ai-bmi-predictor/trained-models/efficientnet-models/eff-ann-v8-training-2025-12-18-11-02-51-500/output/packaged-for-endpoint/model.tar.gz


In [None]:
import boto3
import sagemaker
from sagemaker import image_uris
from sagemaker.tensorflow import TensorFlowModel
from sagemaker import get_execution_role

# -------- config --------
REGION = boto3.Session().region_name
ROLE = get_execution_role()

INSTANCE_TYPE = "ml.g4dn.xlarge"
ENDPOINT_NAME = "BMI-predcitor-V8-4"  # use a NEW name
MODEL_DATA = "s3://ai-bmi-predictor/trained-models/efficientnet-models/eff-ann-v8-training-2025-12-18-11-02-51-500/output/packaged-for-endpoint/model.tar.gz"

# ‚úÖ TensorFlow inference DLC (includes python tensorflow)
image_uri = image_uris.retrieve(
    framework="tensorflow",
    region=REGION,
    version="2.11",
    py_version="py310",          # set python version here
    image_scope="inference",
    instance_type=INSTANCE_TYPE,
)

session = sagemaker.Session()

tf_model = TensorFlowModel(
    model_data=MODEL_DATA,
    role=ROLE,
    framework_version="2.11",    # keep
    entry_point="inference.py",
    source_dir="deploy_bmi/code",
    image_uri=image_uri,         # critical
    sagemaker_session=session,
)

predictor = tf_model.deploy(
    initial_instance_count=1,
    instance_type=INSTANCE_TYPE,
    endpoint_name=ENDPOINT_NAME,
    container_startup_health_check_timeout=1200,
)

print("‚úÖ Deployed:", ENDPOINT_NAME)
print("‚úÖ Image:", image_uri)


----