Cell 1: Load libraries and dataset from S3

In [1]:
# Cell 1: Load libraries and dataset from S3

import boto3        # import boto3 to connect to AWS S3
import pandas as pd # import pandas for data handling

bucket = "ai-bmi-predictor"                 # name of the S3 bucket
key = "test-data/eff_testingA.csv"         # path to the CSV file inside the bucket

s3 = boto3.client("s3")                    # create an S3 client using your AWS credentials
obj = s3.get_object(Bucket=bucket, Key=key)  # download the S3 object that contains the CSV file

data = pd.read_csv(obj["Body"])           # read the CSV content from the S3 object body into a DataFrame

data.head()                               # display the first few rows to confirm the data loaded correctly


Unnamed: 0,photo_id,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,hip,leg-length,shoulder-breadth,shoulder-to-crotch,thigh,waist,wrist,gender,height_cm,weight_kg
0,e5ae8fe5bbdf611a1e8d06e66e849bdf,0.073159,0.085775,-0.133776,0.881202,0.214236,0.016104,-0.180302,-0.100713,-0.117249,...,106.77469,83.279744,39.922305,70.005128,55.945992,98.25039,20.187082,male,180.0,94.6
1,605a5fd09058c48156b0ef518b63b2de,0.092031,-0.066016,-0.145132,0.687441,0.186508,-0.075221,-0.093846,-0.03584,0.033903,...,102.481633,84.876529,39.974203,73.591637,55.397032,88.003618,17.715785,male,188.9,86.75
2,909c9277309e13ee014e347603aba620,0.057046,-0.051366,-0.148253,0.675916,0.209973,-0.073485,-0.072783,-0.059395,0.00837,...,99.342301,82.275874,36.059983,66.440526,53.742692,82.100598,17.086464,male,179.7,73.85
3,bef6a68bc8dd475c124f6de2413385d3,-0.018792,0.016435,-0.148091,0.464433,0.242849,-0.106556,0.001489,-0.083478,0.096048,...,101.770144,76.081842,34.071748,62.218026,52.396573,83.999124,16.299751,female,166.95,69.05
4,6d7ed4bc4a17546447efed0ca6e2ff11,0.084419,0.065945,-0.153379,0.635377,0.285274,-0.056372,-0.139008,-0.120711,-0.002466,...,94.707063,81.328892,36.834735,64.426273,49.895157,86.020117,16.531431,male,173.2,65.55


Cell 2: Compute BMI

In [2]:
# Cell 2: Compute BMI (using original height and weight) and store it in the dataset

data["BMI"] = data["weight_kg"] / ((data["height_cm"] / 100) ** 2)  # calculate BMI and save as a new column
data["BMI"].describe()                                              # quickly inspect BMI statistics

count    1684.000000
mean       24.056672
std         4.147934
min        16.019762
25%        21.851282
50%        23.221269
75%        25.538605
max        37.510454
Name: BMI, dtype: float64

Cell 3: Categorical encoding for 'gender' feature

In [3]:
# Cell 3: Categorical encoding for 'gender' feature

data["gender"] = data["gender"].astype("category")  # convert 'gender' column to categorical type
data["gender"] = data["gender"].cat.codes           # replace categories with numeric codes

Cell 4: Min-max scaling (range -1 to 1) for body measurements and height

In [4]:
# Cell 4: Min-max scaling (range -1 to 1) for body measurements and height

from sklearn.preprocessing import MinMaxScaler  # import the MinMaxScaler for normalization

cols_to_scale_targets = [
    "ankle", "arm-length", "bicep", "calf", "chest", "forearm", "hip",
    "leg-length", "shoulder-breadth", "shoulder-to-crotch", "thigh",
    "waist", "wrist", "weight_kg"
]                                               # list of target columns to scale (body measurements and weight)

height_col = ["height_cm"]                      # list containing the name of the height column (input only)

scaler_targets = MinMaxScaler(feature_range=(-1, 1))  # scaler for target body measurements and weight
scaler_height = MinMaxScaler(feature_range=(-1, 1))   # scaler for height_cm (input feature)

data[cols_to_scale_targets] = scaler_targets.fit_transform(  # fit & transform target columns with scaler_targets
    data[cols_to_scale_targets]
)

data[height_col] = scaler_height.fit_transform(             # fit & transform height column with scaler_height
    data[height_col]
)


Cell 5: Create X (independent features) and Y (multi-target outputs)

In [5]:
# Cell 5: Create X (independent features) and Y (multi-target outputs)

target_cols = [
    "ankle", "arm-length", "bicep", "calf", "chest", "forearm", "hip",
    "leg-length", "shoulder-breadth", "shoulder-to-crotch", "thigh",
    "waist", "wrist", "weight_kg"
]                                                # list of target columns for multi-target regression

Y = data[target_cols]                            # select target columns as Y (scaled values)
print("Selected target columns:", target_cols)   # print which columns are used as targets
print("Shape of Y (samples, targets):", Y.shape) # print the shape of Y to confirm dimensions

drop_cols = ["photo_id", "subject_id"] + target_cols + ["BMI"]  # columns to drop when building X (IDs, targets, BMI helper column)

print("Columns to drop for X:\n", drop_cols)     # show which columns will be removed from data to form X

X = data.drop(columns=drop_cols)                 # drop unwanted columns to create feature matrix X

print("\nShape of X (samples, independent features):", X.shape)  # print shape of X
# print("\nColumns in X:\n", X.columns.tolist())   # optional: print all feature names in X


Selected target columns: ['ankle', 'arm-length', 'bicep', 'calf', 'chest', 'forearm', 'hip', 'leg-length', 'shoulder-breadth', 'shoulder-to-crotch', 'thigh', 'waist', 'wrist', 'weight_kg']
Shape of Y (samples, targets): (1684, 14)
Columns to drop for X:
 ['photo_id', 'subject_id', 'ankle', 'arm-length', 'bicep', 'calf', 'chest', 'forearm', 'hip', 'leg-length', 'shoulder-breadth', 'shoulder-to-crotch', 'thigh', 'waist', 'wrist', 'weight_kg', 'BMI']

Shape of X (samples, independent features): (1684, 5122)


Cell 6: Load trained Keras model from S3

In [6]:
# Cell 6: Load trained Keras model from S3 (in-memory)

import io                                        # import io for in-memory byte streams
import h5py                                      # import h5py to open HDF5 file objects
import tensorflow as tf                          # import tensorflow to load the Keras model

bucket_name = "ai-bmi-predictor"                 # S3 bucket name for the trained model
model_key  = "trained-models/efficientnet-models/eff_ann_version3.h5"  # path of model file in S3

print("Creating S3 client...")                   # status message to show progress
s3 = boto3.client("s3")                          # create a new S3 client (safe even if one already exists)

print(f"Reading model bytes from s3://{bucket_name}/{model_key}")  # show which model file is being read
obj = s3.get_object(Bucket=bucket_name, Key=model_key)             # download the model file as an S3 object
model_bytes = obj["Body"].read()                                   # read the S3 object body as raw bytes

byte_stream = io.BytesIO(model_bytes)           # wrap the raw bytes in an in-memory binary stream

print("Opening HDF5 file from memory...")        # status message before loading HDF5
with h5py.File(byte_stream, "r") as h5file:      # open the binary stream as an HDF5 file
    best_model = tf.keras.models.load_model(h5file)  # load the Keras model from the HDF5 file

print("Model loaded successfully from S3 (in-memory)!")  # confirmation message


2025-12-09 09:14:09.683566: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-09 09:14:09.696877: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-12-09 09:14:09.716750: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-12-09 09:14:09.716776: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-12-09 09:14:09.728962: I tensorflow/core/platform/cpu_feature_gua

Creating S3 client...
Reading model bytes from s3://ai-bmi-predictor/trained-models/efficientnet-models/eff_ann_version3.h5
Opening HDF5 file from memory...


2025-12-09 09:14:11.749176: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-12-09 09:14:11.795990: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-12-09 09:14:11.797067: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Model loaded successfully from S3 (in-memory)!


Cell 7: Pick one random record within a BMI range, predict targets, and compare with actual values

In [7]:
# Cell 7: Pick one random record within a BMI range, predict targets, and compare with actual values

bmi_min = 18.0                                    # minimum BMI value for selection
bmi_max = 20.0                                    # maximum BMI value for selection

bmi_filtered = data[(data["BMI"] >= bmi_min) &    # filter rows where BMI is greater than or equal to bmi_min
                    (data["BMI"] <= bmi_max)]     # and BMI is less than or equal to bmi_max

print("Number of records in BMI range:", len(bmi_filtered))  # show how many records match the BMI condition

if bmi_filtered.empty:                            # check if there are no rows in the requested BMI range
    raise ValueError("No records found in the BMI range 18–20. Please adjust the range or check the data.")  # raise an error if none

sample_row = bmi_filtered.sample(n=1, random_state=None)  # randomly pick a single row from the filtered DataFrame

sample_index = sample_row.index[0]               # get the index of the selected row

X_sample = X.loc[[sample_index]]                 # extract the corresponding feature row from X (as a DataFrame)
Y_sample_scaled = Y.loc[[sample_index]]          # extract the corresponding scaled targets row from Y

y_pred_scaled = best_model.predict(X_sample)     # use the trained model to predict scaled target values for the selected record

y_actual = scaler_targets.inverse_transform(     # inverse-transform scaled actual targets back to original units
    Y_sample_scaled.values
)

y_pred = scaler_targets.inverse_transform(       # inverse-transform scaled predicted targets back to original units
    y_pred_scaled
)

actual_df = pd.DataFrame(y_actual, columns=target_cols).T   # create a DataFrame for actual values (transpose for nicer layout)
actual_df.columns = ["Actual"]                              # rename the single column to 'Actual'

pred_df = pd.DataFrame(y_pred, columns=target_cols).T       # create a DataFrame for predicted values (transpose for nicer layout)
pred_df.columns = ["Predicted"]                             # rename the single column to 'Predicted'

result_df = pd.concat([actual_df, pred_df], axis=1)         # combine actual and predicted values side-by-side

print(f"Selected sample index: {sample_index}")             # show the index of the selected sample
print(f"BMI of selected sample: {data.loc[sample_index, 'BMI']:.2f}")  # print the BMI of the selected record
print("\nActual vs Predicted values for the selected record:\n")      # header for clarity
print(result_df)                                             # print the table with actual and predicted values


Number of records in BMI range: 90


2025-12-09 09:14:17.677126: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:191] failed to create cublas handle: the resource allocation failed
2025-12-09 09:14:17.677159: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:194] Failure to initialize cublas may be due to OOM (cublas needs some free memory when you initialize it, and your deep-learning framework may have preallocated more than its fair share), or may be because this binary was not built with support for the GPU in your machine.
2025-12-09 09:14:17.677179: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INTERNAL: No blas support for stream
	 [[{{node sequential/dense/MatMul}}]]


InternalError: Graph execution error:

Detected at node sequential/dense/MatMul defined at (most recent call last):
  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/runpy.py", line 86, in _run_code

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 758, in start

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 211, in start

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/utils.py", line 71, in preserve_context

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 614, in shell_main

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 471, in dispatch_shell

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 366, in execute_request

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 827, in execute_request

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 458, in do_execute

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 663, in run_cell

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2974, in run_cell

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3029, in _run_cell

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3256, in run_cell_async

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3472, in run_ast_nodes

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3552, in run_code

  File "/tmp/ipykernel_18902/1126383453.py", line 21, in <cell line: 21>

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 2650, in predict

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 2436, in predict_function

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 2421, in step_function

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 2409, in run_step

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 2377, in predict_step

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 553, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 558, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 588, in __call__

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 553, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 558, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/base_layer.py", line 1047, in __call__

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__

  File "/tmp/__autograph_generated_filezetbjpti.py", line 34, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/sequential.py", line 394, in call

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/sequential.py", line 397, in call

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/functional.py", line 514, in call

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/functional.py", line 661, in _run_internal_graph

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/functional.py", line 663, in _run_internal_graph

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/functional.py", line 663, in _run_internal_graph

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/functional.py", line 663, in _run_internal_graph

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/functional.py", line 671, in _run_internal_graph

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 553, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/training.py", line 558, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/base_layer.py", line 1047, in __call__

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/engine/base_layer.py", line 1136, in __call__

  File "/tmp/__autograph_generated_filezetbjpti.py", line 34, in error_handler

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/layers/core/dense.py", line 212, in call

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/layers/core/dense.py", line 218, in call

  File "/home/ec2-user/anaconda3/envs/tensorflow2_p310/lib/python3.10/site-packages/tf_keras/src/layers/core/dense.py", line 241, in call

No blas support for stream
	 [[{{node sequential/dense/MatMul}}]] [Op:__inference_predict_function_411]