In [1]:
import os

os.environ["AWS_ACCESS_KEY_ID"] = "AKIAZIPVEJLUP7BUVGON"
os.environ["AWS_SECRET_ACCESS_KEY"] = "hRRRboPzbIUNgZ41KmRhPZMUbS3N77AJmGDfYL5m"
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"  # e.g., "us-east-1"
os.environ["DEFAULT_BUCKET"] = "mhacksforsid"  # optional convenience

# Optional tuning
os.environ["MAX_BYTES_IN_MEMORY"] = "52428800"  # 50 MB
os.environ["ALLOWED_EXTS"] = ".wav,.mp3,.flac,.m4a"
os.environ["FRONTEND_ORIGIN"] = "http://localhost:5173"

In [2]:
import boto3, botocore
from pathlib import Path
import tempfile
from typing import Tuple, Optional

s3 = boto3.client("s3")

def head_object(bucket: str, key: str) -> Tuple[int, Optional[str]]:
    """Return (size_bytes, content_type) or raise a ClientError."""
    resp = s3.head_object(Bucket=bucket, Key=key)
    size = resp.get("ContentLength", 0)
    ctype = resp.get("ContentType")
    return size, ctype

def get_object_bytes(bucket: str, key: str) -> bytes:
    resp = s3.get_object(Bucket=bucket, Key=key)
    return resp["Body"].read()

def download_to_temp(bucket: str, key: str) -> Path:
    fd, path = tempfile.mkstemp(prefix="s3_", suffix="_obj")
    Path(path).unlink(missing_ok=True)  # we’ll stream directly
    # use a named temp file for simplicity
    path = Path(tempfile.gettempdir()) / f"s3_{os.getpid()}_{abs(hash((bucket,key)))}"
    with open(path, "wb") as f:
        s3.download_fileobj(bucket, key, f)
    return path


In [3]:
from typing import Dict, Any, Optional

class SpeechProcessor:
    """
    Replace this with: from speech import SpeechProcessor
    And keep the same method signature for easy swap-in.
    """
    def __init__(self, **options):
        self.options = options

    def process_bytes(self, data: bytes) -> Dict[str, Any]:
        # Demo: pretend we computed something
        return {
            "summary": "Processed from bytes",
            "length_bytes": len(data),
            "options": self.options,
        }

    def process_file(self, path: str) -> Dict[str, Any]:
        # Demo computation on a file
        import os
        size = os.path.getsize(path)
        return {
            "summary": "Processed from file",
            "file_path": path,
            "length_bytes": size,
            "options": self.options,
        }

In [4]:
import json, time, uuid, os, traceback
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field, field_validator
from typing import Optional, Dict, Any, List

# Config
DEFAULT_BUCKET = os.getenv("DEFAULT_BUCKET", "")
MAX_BYTES_IN_MEMORY = int(os.getenv("MAX_BYTES_IN_MEMORY", "52428800"))  # 50MB
ALLOWED_EXTS = set([e.strip().lower() for e in os.getenv("ALLOWED_EXTS", ".wav,.mp3,.flac,.m4a").split(",") if e.strip()])
FRONTEND_ORIGIN = os.getenv("FRONTEND_ORIGIN", "http://localhost:5173")

def is_allowed_key(key: str) -> bool:
    key_lower = key.lower()
    return any(key_lower.endswith(ext) for ext in ALLOWED_EXTS)

# Pydantic models
class ProcessRequest(BaseModel):
    bucket: Optional[str] = Field(default=None, description="S3 bucket name (optional if DEFAULT_BUCKET is set)")
    key: str = Field(..., description="S3 object key (e.g., audio/sample.wav)")
    options: Optional[Dict[str, Any]] = Field(default_factory=dict)

    @field_validator("key")
    @classmethod
    def validate_key(cls, v: str) -> str:
        if ".." in v or v.startswith("/") or v.strip() == "":
            raise ValueError("Invalid S3 key")
        return v

class ProcessResult(BaseModel):
    request_id: str
    input: Dict[str, Any]
    result: Dict[str, Any]
    meta: Dict[str, Any]

class ErrorPayload(BaseModel):
    request_id: str
    error: Dict[str, Any]

# App
app = FastAPI(title="Speech API (Notebook)")

app.add_middleware(
    CORSMiddleware,
    allow_origins=[FRONTEND_ORIGIN, "http://localhost", "http://127.0.0.1"],
    allow_credentials=False,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.get("/health")
def health():
    return {"ok": True}

@app.post("/process", response_model=ProcessResult)
def process(req: ProcessRequest):
    request_id = str(uuid.uuid4())
    t0 = time.time()

    bucket = req.bucket or DEFAULT_BUCKET
    if not bucket:
        raise HTTPException(status_code=400, detail={"request_id": request_id, "code":"BAD_INPUT","message":"Bucket not provided and DEFAULT_BUCKET not set"})

    if not is_allowed_key(req.key):
        raise HTTPException(status_code=415, detail={"request_id": request_id, "code":"UNSUPPORTED_MEDIA_TYPE","message": f"Key must end with one of: {sorted(ALLOWED_EXTS)}"})

    # Head object to validate and detect size/type
    try:
        size, ctype = head_object(bucket, req.key)
    except botocore.exceptions.ClientError as e:
        status = e.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
        code = e.response.get("Error", {}).get("Code", "S3_ERROR")
        if status == 404:
            raise HTTPException(status_code=404, detail={"request_id":request_id,"code":"S3_NOT_FOUND","message":"Object not found"})
        elif status == 403:
            raise HTTPException(status_code=403, detail={"request_id":request_id,"code":"S3_FORBIDDEN","message":"Access denied"})
        else:
            raise HTTPException(status_code=502, detail={"request_id":request_id,"code":code,"message":"S3 error"})

    # Download
    t_dl0 = time.time()
    try:
        if size <= MAX_BYTES_IN_MEMORY:
            data = get_object_bytes(bucket, req.key)
            downloaded = len(data)
            use_bytes = True
            tmp_path = None
        else:
            tmp_path = download_to_temp(bucket, req.key)
            downloaded = size
            use_bytes = False
            data = None
    except Exception as e:
        raise HTTPException(status_code=502, detail={"request_id":request_id,"code":"S3_DOWNLOAD_FAILED","message":str(e)})
    t_dl1 = time.time()

    # Compute (call your speech processor)
    t_cmp0 = time.time()
    try:
        sp = SpeechProcessor(**(req.options or {}))
        if use_bytes:
            result = sp.process_bytes(data)
        else:
            result = sp.process_file(str(tmp_path))
    except Exception as e:
        traceback.print_exc()
        raise HTTPException(status_code=422, detail={"request_id":request_id,"code":"PROCESSING_FAILED","message":str(e)})
    finally:
        # Cleanup temp file if created
        try:
            if not use_bytes and tmp_path is not None and os.path.exists(tmp_path):
                os.remove(tmp_path)
        except Exception:
            pass
    t_cmp1 = time.time()

    resp = {
        "request_id": request_id,
        "input": {"bucket": bucket, "key": req.key, "options": req.options or {}},
        "result": result,
        "meta": {
            "content_type": ctype,
            "size_bytes": size,
            "downloaded_bytes": downloaded,
            "download_ms": int((t_dl1 - t_dl0) * 1000),
            "compute_ms": int((t_cmp1 - t_cmp0) * 1000),
            "total_ms": int((time.time() - t0) * 1000),
        }
    }
    return resp