1. download the model

In [None]:
!wget -O sapiens_2b_goliath_best_goliath_mIoU_8179_epoch_181_torchscript.pt2 \
  "https://huggingface.co/spaces/fashn-ai/sapiens-body-part-segmentation/resolve/main/assets/checkpoints/sapiens_2b_goliath_best_goliath_mIoU_8179_epoch_181_torchscript.pt2"

2. send the model to s3 bucket

In [None]:
import boto3

# Local file (already downloaded in your notebook directory)
local_file = "sapiens_2b_goliath_best_goliath_mIoU_8179_epoch_181_torchscript.pt2"

# S3 info
bucket_name = "ai-bmi-predictor"
s3_key = f"image-segmentation/sapiens/{local_file}"  # creates the 'folders' via prefix

# Upload
s3 = boto3.client("s3")
s3.upload_file(local_file, bucket_name, s3_key)

print("Uploaded to:", f"s3://{bucket_name}/{s3_key}")


In [1]:
import cv2                      # OpenCV for loading and saving images
import torch                    # PyTorch for running the TorchScript model
import numpy as np              # NumPy for array operations
from torchvision import transforms  # For image preprocessing
import torch.nn.functional as F     # For resizing (interpolation)

# ----------------- CONFIG -----------------

model_path = "sapiens_2b_goliath_best_goliath_mIoU_8179_epoch_181_torchscript.pt2"  # Path to the Sapiens 2B model
input_image_path = "man.png"                                                        # Input image file
output_mask_path = "man_mask_bw.png"                                               # Output mask file (black & white)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")              # Use GPU if available, else CPU
print(f"Using device: {device}")                                                   # Print which device is used

# ----------------- PREPROCESSOR -----------------

print("Creating preprocessing pipeline...")                                        # Inform about preprocessing creation
preprocess = transforms.Compose([                                                  # Define preprocessing steps
    transforms.ToPILImage(),                                                       # Convert NumPy array to PIL image
    transforms.Resize((1024, 768)),                                                # Resize image to model's expected size
    transforms.ToTensor(),                                                         # Convert PIL image to Tensor in [0, 1]
    transforms.Normalize(                                                          # Normalize with ImageNet stats
        mean=(0.485, 0.456, 0.406),                                                # Mean for each RGB channel
        std=(0.229, 0.224, 0.225)                                                  # Std for each RGB channel
    ),
    transforms.Lambda(lambda x: x.unsqueeze(0))                                    # Add batch dimension -> (1, C, H, W)
])
print("Preprocessing pipeline ready.")                                             # Confirm preprocessing is ready

# ----------------- LOAD MODEL -----------------

print("Step 1/7: Loading TorchScript model (this can take a while)...")            # Inform that model loading starts
model = torch.jit.load(model_path)                                                # Load the model from disk
model = model.eval().to(device)                                                   # Set to eval mode and move to device
print("Model loaded and moved to device.")                                        # Confirm model loaded

# ----------------- LOAD IMAGE -----------------

print("Step 2/7: Loading input image...")                                         # Inform that image loading starts
img_bgr = cv2.imread(input_image_path)                                            # Read the image from file (BGR)
assert img_bgr is not None, "Could not load man.png"                              # Ensure image exists

orig_h, orig_w = img_bgr.shape[:2]                                                # Get original height and width
print(f"Input image size: {orig_w}x{orig_h}")                                     # Print original size

img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)                                # Convert BGR to RGB
print("Input image converted to RGB.")                                            # Confirm conversion

# ----------------- PREPARE TENSOR -----------------

print("Step 3/7: Preprocessing image for the model...")                           # Inform preprocessing starts
input_tensor = preprocess(img_rgb)                                                # Apply preprocessing pipeline
input_tensor = input_tensor.to(device)                                            # Move tensor to device
print(f"Preprocessed tensor shape: {tuple(input_tensor.shape)}")                  # Print tensor shape

# ----------------- RUN MODEL -----------------

print("Step 4/7: Running model inference (this is usually the slowest step)...")  # Inform that inference starts
with torch.inference_mode():                                                      # Disable gradients for inference
    output = model(input_tensor)                                                  # Forward pass through the model
print("Model inference completed.")                                               # Confirm inference done

# ----------------- POSTPROCESS: GET SEGMENTATION MAP -----------------

print("Step 5/7: Postprocessing model output...")                                 # Inform that postprocessing starts
logits_small = output[0].cpu()                                                    # Take first output and move to CPU

print("Resizing logits back to original image size...")                           # Inform about resizing
logits = F.interpolate(                                                           # Resize logits
    logits_small.unsqueeze(0),                                                    # Add batch dimension: (1, C, H', W')
    size=(orig_h, orig_w),                                                        # Target size (H, W)
    mode="bilinear"                                                               # Bilinear interpolation
).squeeze(0)                                                                      # Remove batch dimension -> (C, H, W)

segmentation_map = logits.argmax(dim=0)                                           # Take argmax over classes -> (H, W)
segmentation_map_np = segmentation_map.numpy().astype(np.uint8)                   # Convert to uint8 NumPy array
print("Segmentation map created.")                                                # Confirm segmentation done

# ----------------- CREATE BINARY BODY MASK -----------------

print("Step 6/7: Creating binary body mask...")                                   # Inform that mask creation starts
body_mask = (segmentation_map_np != 0).astype(np.uint8)                           # 1 where body, 0 where background
body_mask_bw = (body_mask * 255).astype(np.uint8)                                 # Scale to 0 or 255 for B/W
print("Binary body mask created.")                                                # Confirm mask ready

# ----------------- SAVE RESULT -----------------

print("Step 7/7: Saving black-and-white mask image...")                           # Inform that saving starts
cv2.imwrite(output_mask_path, body_mask_bw)                                       # Save mask as grayscale PNG
print(f"Done! Mask saved to: {output_mask_path}")                                 # Final confirmation message


  import pynvml  # type: ignore[import]


Using device: cuda
Creating preprocessing pipeline...
Preprocessing pipeline ready.
Step 1/7: Loading TorchScript model (this can take a while)...
Model loaded and moved to device.
Step 2/7: Loading input image...
Input image size: 413x627
Input image converted to RGB.
Step 3/7: Preprocessing image for the model...
Preprocessed tensor shape: (1, 3, 1024, 768)
Step 4/7: Running model inference (this is usually the slowest step)...
Model inference completed.
Step 5/7: Postprocessing model output...
Resizing logits back to original image size...
Segmentation map created.
Step 6/7: Creating binary body mask...
Binary body mask created.
Step 7/7: Saving black-and-white mask image...
Done! Mask saved to: man_mask_bw.png
