In [None]:
from fastapi import FastAPI, Request
import redis
import boto3
from pydantic import BaseModel
from transformers import pipeline

app = FastAPI()

# Redis
r = redis.Redis(host='localhost', port=6379)

# SageMaker runtime
runtime = boto3.client('sagemaker-runtime')

# Intent classifier (local or endpoint)
classifier = pipeline("text-classification", model="bert-sdsu-intent")

class Query(BaseModel):
    question: str

@app.post("/ask")
async def ask_question(query: Query):
    question = query.question.lower().strip()

    # 1. Check cache
    if r.exists(question):
        return {"source": "cache", "answer": r.get(question).decode("utf-8")}

    # 2. Predict intent
    intent = classifier(question)[0]["label"]

    # 3. Route to SageMaker
    endpoint_map = {
        "Admissions": "AdmissionsAgentEndpoint",
        "Courses": "CoursesAgentEndpoint"
    }
    endpoint = endpoint_map.get(intent, "DefaultAgentEndpoint")

    response = runtime.invoke_endpoint(
        EndpointName=endpoint,
        ContentType="application/json",
        Body=f'{{"inputs": "{question}"}}'
    )

    answer = response['Body'].read().decode("utf-8")
    r.setex(question, 900, answer)  # Cache for 15 mins

    return {"source": "model", "answer": answer}