In [None]:
# ✅ Step 1: Install dependencies (run only once in Colab or your environment)
!pip install -q transformers accelerate fastapi uvicorn pyngrok torch "python-multipart"

# ✅ Step 2: Import libraries
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from pyngrok import ngrok
import torch
import uvicorn
import nest_asyncio
import getpass

# ✅ Step 3: Get ngrok token
NGROK_TOKEN = getpass.getpass("🔐 Enter your ngrok auth token: ")
ngrok.set_auth_token(NGROK_TOKEN)

# ✅ Step 4: Load model and tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

# ✅ Step 5: Create FastAPI app
app = FastAPI(title="Children's Story Generator")

# ✅ Step 6: Enable CORS for all origins
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # You can replace "*" with your frontend origin for security
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ✅ Step 7: Define request schema
class StoryRequest(BaseModel):
    theme: str

# ✅ Step 8: Define the story generation endpoint
@app.post("/generate")
def generate_story(request: StoryRequest):
    theme = request.theme.strip()

    prompt = f"""
    Write a children's story for ages 5 to 10 based on the theme: "{theme}".
    Use simple language and a colorful, engaging tone.
    The story must include:
    - A clear setting (e.g. forest, school, beach)
    - A main character (e.g. animal, child, toy)
    - A small adventure or challenge
    - A meaningful message related to the theme
    Keep the story under 600 words.

    Begin the story directly. Do not repeat the instructions or theme.

    Example:
    In the forest, a small rabbit named Bunnicula was playing in the leaves...
    Now write a new story:
    """

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(
        **inputs,
        max_new_tokens=800,  # Increased for full story
        temperature=0.85,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract the story part after the prompt
    story = decoded.split("Now write a new story:")[-1].strip()

    return {"theme": theme, "story": story}


# ✅ Step 9: Start FastAPI with ngrok tunnel
if __name__ == "__main__":
    nest_asyncio.apply()
    public_url = ngrok.connect(8000)
    print(f"🚀 Public API is live at: {public_url}/generate")
    uvicorn.run(app, host="0.0.0.0", port=8000)
