In [6]:
import json, requests
from google.colab import userdata

# Lambda URL & APP_TOKEN from Colab userdata
FUNCTION_URL = userdata.get('FUNCTION_URL')
APP_TOKEN = userdata.get('APP_TOKEN')

# timeout in lambda is 10 sec, 2 sec for lambda to close the call
def call_lambda(prompt: str, url=FUNCTION_URL, token=APP_TOKEN, timeout=8):
    if not url or not token:
        raise ValueError("❌ Missing FUNCTION_URL or APP_TOKEN in Colab userdata.")

    headers = {
        "Content-Type": "application/json",
        "x-app-token": token  # must match Lambda env variable
    }
    payload = {"prompt": prompt}

    try:
        r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=timeout)
        r.raise_for_status()  # raise HTTP Error if not 200
        return r
    except requests.exceptions.Timeout:
        print("⚠️ Request to Lambda timed out.")
    except requests.exceptions.RequestException as e:
        print(f"⚠️ Request failed: {e}")
    return None

# Test
resp = call_lambda("Give me 2 bullets describing this serverless AI endpoint.")
if resp:
    print("Status:", resp.status_code)
    print("Raw body:", resp.text)
    try:
        print("\nPretty JSON:")
        print(json.dumps(resp.json(), ensure_ascii=False, indent=2))
    except Exception:
        pass

Status: 200
Raw body: {"answer": "- Fully managed, on-demand AI inference endpoint that auto-scales with traffic and is billed per invocation, with no server provisioning or maintenance required.\n- Accepts JSON requests to run a hosted model or pipeline, returning AI results (predictions, completions, embeddings) and includes built-in security (auth), rate limiting, retries, and logging."}

Pretty JSON:
{
  "answer": "- Fully managed, on-demand AI inference endpoint that auto-scales with traffic and is billed per invocation, with no server provisioning or maintenance required.\n- Accepts JSON requests to run a hosted model or pipeline, returning AI results (predictions, completions, embeddings) and includes built-in security (auth), rate limiting, retries, and logging."
}
