# MLExample: Meal Recommendation End-to-End Pipeline

This notebook demonstrates how to:
1. Generate and upload meal recommendation training data to S3
2. Train a model using SageMaker
3. Deploy a serverless inference endpoint on SageMaker
4. Deploy an API Gateway with Lambda proxy integration


In [4]:
# Set all parameters for the ML pipeline
import os




# S3 and SageMaker parameters
MEAL_DATA_BUCKET = "AWS-ML-Example-SDI-bucket123"
SAGEMAKER_ROLE = "arn:aws:iam::054116116033:role/ml-example-sagemaker-execution-role"
LAMBDA_EXEC_ROLE_ARN = "arn:aws:iam::054116116033:role/ml-example-lambda-execution-role"
REGION = "us-west-2"
TRAIN_DATA_KEY = "meal_data.csv"
MODEL_OUTPUT_PREFIX = "model/"
ENDPOINT_NAME = "meal-recommender-serverless-endpoint"
API_GATEWAY_NAME = "MealRecommenderAPI"
LAMBDA_FUNCTION_NAME = "MealRecommenderProxy"

# Set environment variables for use in other cells
os.environ["MEAL_DATA_BUCKET"] = MEAL_DATA_BUCKET
os.environ["SAGEMAKER_ROLE"] = SAGEMAKER_ROLE
os.environ["LAMBDA_EXEC_ROLE_ARN"] = LAMBDA_EXEC_ROLE_ARN
os.environ["AWS_DEFAULT_REGION"] = REGION
os.environ["AWS_PROFILE"] = "rjawsprofile"

## 1. Generate and Upload Training Data to S3

Generate a synthetic meal recommendation dataset and upload it to an S3 bucket for use in SageMaker training.

In [None]:
import random
import csv
import uuid
from datetime import datetime, timedelta
import boto3
import os

os.environ["AWS_PROFILE"] = "rjawsprofile"
meal_options = {
    "Indian": ["Chana Masala", "Paneer Tikka", "Masala Dosa", "Biryani", "Dal Makhani"],
    "American": ["Cheeseburger", "Grilled Chicken Salad", "Mac and Cheese", "BBQ Ribs", "Pancakes"],
    "British": ["Fish and Chips", "Shepherd's Pie", "Full English Breakfast", "Bangers and Mash", "Roast Beef"],
    "Chinese": ["Kung Pao Chicken", "Sweet and Sour Pork", "Fried Rice", "Spring Rolls", "Mapo Tofu"],
    "Mexican": ["Tacos", "Burrito Bowl", "Enchiladas", "Quesadillas", "Chilaquiles"]
}

backgrounds = list(meal_options.keys())
times_of_day = ["Morning", "Noon", "Evening", "Night"]
health_levels = ["Low", "Medium", "High"]

rows = []
for _ in range(100_000):
    background = random.choice(backgrounds)
    meal = random.choice(meal_options[background])
    time_of_day = random.choice(times_of_day)
    family_size = random.choices([1, 2, 3, 4, 5, 6], weights=[0.3, 0.25, 0.2, 0.15, 0.07, 0.03])[0]
    health = random.choice(health_levels)
    record = {
        "id": str(uuid.uuid4()),
        "background": background,
        "meal": meal,
        "time_of_day": time_of_day,
        "family_size": family_size,
        "health_consciousness": health,
        "created_at": (datetime.now() - timedelta(days=random.randint(0, 365))).strftime("%Y-%m-%d")
    }
    rows.append(record)

csv_path = "meal_data.csv"
with open(csv_path, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=rows[0].keys())
    writer.writeheader()
    writer.writerows(rows)

print(f"Generated {len(rows)} meal records in {csv_path}")

# Upload to S3
bucket = os.environ.get("MEAL_DATA_BUCKET")
s3_key = "meal_data.csv"
if bucket:
    s3 = boto3.client('s3')
    s3.upload_file(csv_path, bucket, s3_key)
    print(f"Uploaded {csv_path} to s3://{bucket}/{s3_key}")
else:
    print("Set the MEAL_DATA_BUCKET environment variable to upload to S3.")


## 2. Train Model Using SageMaker

Use the SageMaker Python SDK to launch a training job with the generated meal data in S3. The training script will build a meal recommendation model and save the artifact to S3.

In [None]:
import sagemaker
from sagemaker.sklearn.estimator import SKLearn
import boto3
import os

sess = sagemaker.Session()

input_s3 = f"s3://{MEAL_DATA_BUCKET}/{TRAIN_DATA_KEY}"
output_s3 = f"s3://{MEAL_DATA_BUCKET}/{MODEL_OUTPUT_PREFIX}"

sklearn_estimator = SKLearn(
    entry_point="src/train.py",
    role=SAGEMAKER_ROLE,
    instance_type="ml.m5.large",
    framework_version="0.23-1",
    sagemaker_session=sess,
    output_path=output_s3,
    hyperparameters={}
)

sklearn_estimator.fit({"train": input_s3})
print(f"Model trained and saved to {output_s3}")


## 3. Set Up SageMaker Serverless Inference Endpoint

Deploy the trained model as a serverless inference endpoint using the SageMaker Python SDK.

In [None]:
from sagemaker.sklearn.model import SKLearnModel

model_artifact = sklearn_estimator.model_data

sklearn_model = SKLearnModel(
    model_data=model_artifact,
    role=SAGEMAKER_ROLE,
    entry_point="src/inference.py",
    framework_version="0.23-1",
    sagemaker_session=sess
)

predictor = sklearn_model.deploy(
    endpoint_name=ENDPOINT_NAME,
    instance_type="ml.m5.large",  # For serverless, use serverless_inference_config in real use
    wait=True
)
print(f"SageMaker serverless inference endpoint '{ENDPOINT_NAME}' deployed.")


## 4. Deploy API Gateway with Lambda Proxy Integration

Create an AWS Lambda function that invokes the SageMaker endpoint, and set up an API Gateway REST API with Lambda proxy integration to expose the inference endpoint as a public API.

In [None]:
import boto3
import zipfile
import os

REGION = os.environ.get("AWS_REGION")
LAMBDA_FUNCTION_NAME = os.environ.get("LAMBDA_FUNCTION_NAME")
LAMBDA_EXEC_ROLE_ARN = os.environ.get("LAMBDA_EXEC_ROLE_ARN")
ENDPOINT_NAME = os.environ.get("SAGEMAKER_ENDPOINT")
API_GATEWAY_NAME = os.environ.get("API_GATEWAY_NAME")

lambda_client = boto3.client('lambda', region_name=REGION)
apigw_client = boto3.client('apigatewayv2', region_name=REGION)

# Package Lambda function
with open("api/lambda_proxy.py", "rb") as f_in, open("lambda_proxy.zip", "wb") as f_out:
    with zipfile.ZipFile(f_out, 'w') as zf:
        zf.writestr("lambda_proxy.py", f_in.read())

# Create Lambda function
with open("lambda_proxy.zip", "rb") as f:
    response = lambda_client.create_function(
        FunctionName=LAMBDA_FUNCTION_NAME,
        Runtime="python3.9",
        Role=LAMBDA_EXEC_ROLE_ARN,
        Handler="lambda_proxy.lambda_handler",
        Code={"ZipFile": f.read()},
        Environment={"Variables": {"SAGEMAKER_ENDPOINT": ENDPOINT_NAME}},
        Timeout=30,
        MemorySize=256,
        Publish=True
    )
    lambda_arn = response["FunctionArn"]
    print(f"Created Lambda: {lambda_arn}")

# Create API Gateway HTTP API
api_response = apigw_client.create_api(
    Name=API_GATEWAY_NAME,
    ProtocolType="HTTP",
    Target=lambda_arn
)
print(f"API Gateway endpoint: {api_response['ApiEndpoint']}")


In [None]:
# Generate and upload meal data to S3 using parameterized variables
import random
import csv
import uuid
from datetime import datetime, timedelta
import boto3
import os

meal_options = {
    "Indian": ["Chana Masala", "Paneer Tikka", "Masala Dosa", "Biryani", "Dal Makhani"],
    "American": ["Cheeseburger", "Grilled Chicken Salad", "Mac and Cheese", "BBQ Ribs", "Pancakes"],
    "British": ["Fish and Chips", "Shepherd's Pie", "Full English Breakfast", "Bangers and Mash", "Roast Beef"],
    "Chinese": ["Kung Pao Chicken", "Sweet and Sour Pork", "Fried Rice", "Spring Rolls", "Mapo Tofu"],
    "Mexican": ["Tacos", "Burrito Bowl", "Enchiladas", "Quesadillas", "Chilaquiles"]
}

backgrounds = list(meal_options.keys())
times_of_day = ["Morning", "Noon", "Evening", "Night"]
health_levels = ["Low", "Medium", "High"]

rows = []
for _ in range(100_000):
    background = random.choice(backgrounds)
    meal = random.choice(meal_options[background])
    time_of_day = random.choice(times_of_day)
    family_size = random.choices([1, 2, 3, 4, 5, 6], weights=[0.3, 0.25, 0.2, 0.15, 0.07, 0.03])[0]
    health = random.choice(health_levels)
    record = {
        "id": str(uuid.uuid4()),
        "background": background,
        "meal": meal,
        "time_of_day": time_of_day,
        "family_size": family_size,
        "health_consciousness": health,
        "created_at": (datetime.now() - timedelta(days=random.randint(0, 365))).strftime("%Y-%m-%d")
    }
    rows.append(record)

csv_path = TRAIN_DATA_KEY
with open(csv_path, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=rows[0].keys())
    writer.writeheader()
    writer.writerows(rows)

print(f"Generated {len(rows)} meal records in {csv_path}")

# Upload to S3
bucket = MEAL_DATA_BUCKET
s3_key = TRAIN_DATA_KEY
if bucket:
    s3 = boto3.client('s3', region_name=REGION)
    s3.upload_file(csv_path, bucket, s3_key)
    print(f"Uploaded {csv_path} to s3://{bucket}/{s3_key}")
else:
    print("Set the MEAL_DATA_BUCKET environment variable to upload to S3.")
