In [None]:
# --- CELL 1: SETUP AND LOGINS ---
# This cell installs all libraries and securely logs you in.

!pip install "transformers[torch]" fastapi uvicorn pyngrok nest-asyncio -q

from huggingface_hub import login
from pyngrok import conf
from google.colab import userdata

try:
    # 1. Get HF_TOKEN from Colab Secrets and log in
    HF_TOKEN = userdata.get('HF_TOKEN')
    login(token=HF_TOKEN)
    print("âœ… Successfully logged into Hugging Face!")

    # 2. Get NGROK_TOKEN from Colab Secrets and configure ngrok
    NGROK_TOKEN = userdata.get('NGROK_TOKEN')
    conf.get_default().auth_token = NGROK_TOKEN
    print("âœ… Successfully configured ngrok!")

except Exception as e:
    print("ðŸš¨ Error loading secrets.")
    print("Did you add 'HF_TOKEN' and 'NGROK_TOKEN' to your Colab Secrets (the 'key' icon)?")

âœ… Successfully logged into Hugging Face!
âœ… Successfully configured ngrok!


In [None]:
# --- CELL 2: YOUR MAIN APPLICATION ---

# --- 1. IMPORTS ---
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import uvicorn
import asyncio
import nest_asyncio
from fastapi import FastAPI
from pydantic import BaseModel
from pyngrok import ngrok
# (No need to import 'conf' or 'userdata' here)

# --- 2. LOAD THE MEDGEMMA MODEL ---
print("Loading MedGemma Model...")
model_id = "google/medgemma-4b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto", # This tells it to use the Colab GPU
)
print("MedGemma Model Loaded.")

# --- 3. CREATE YOUR TRIAGE LOGIC FUNCTION ---
def get_triage_from_text(raw_text: str):
    """
    Runs the MedGemma model and forces it to output a triage level.
    """
    prompt = f"""
    You are a medical triage assistant. A user has provided their symptoms.
    Classify the severity into one of four levels: ER, Urgent Care, Consultation, or Self-Care.
    User Symptoms: "{raw_text}"
    Respond with *only* the single triage level string.
    Triage Level:
    """
    messages = [{"role": "user", "content": prompt}]

    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
    outputs = model.generate(input_ids, max_new_tokens=10)
    response_text = tokenizer.decode(outputs[0][input_ids.shape[1]:], skip_special_tokens=True).strip()

    if "ER" in response_text:
        return "ER"
    elif "Urgent Care" in response_text:
        return "Urgent Care"
    elif "Consultation" in response_text:
        return "Consultation"
    else:
        return "Self-Care"

# --- 4. CREATE THE API ---
app = FastAPI()

class TriageRequest(BaseModel):
    symptoms: str

@app.get("/")
def read_root():
    return {"status": "Triage Model API is running"}

@app.post("/triage")
def run_triage(request: TriageRequest):
    triage_level = get_triage_from_text(request.symptoms)
    return {
        "predicted_disease": "N/A (Triage by MedGemma)",
        "triage_level": triage_level
    }

# --- 5. START THE SERVER AND CREATE THE PUBLIC URL ---
# We need to import threading and time
import threading
import time

# We must run the uvicorn server in a separate thread
# so it doesn't block the main thread or conflict with Colab's event loop.

def run_server():
    # This is a blocking call, so it's perfect for a thread
    uvicorn.run(app, host="0.0.0.0", port=8000)

print("Starting Uvicorn server in a background thread...")
server_thread = threading.Thread(target=run_server)
server_thread.start()

# Give the server 10 seconds to fully start up.
# This is important!
print("Waiting 10 seconds for the server to boot...")
time.sleep(10)

# Now that the server is running, connect ngrok to it.
try:
    public_url = ngrok.connect(8000)
    print("---" * 20)
    print(f"--- Your Triage API is LIVE at: {public_url} ---")
    print("--- Give this URL to Person C! ---")
    print("---" * 20)
    print("The API is live. This Colab cell must remain running.")

    # Keep the main thread alive (this will block forever, which is correct)
    server_thread.join()

except Exception as e:
    print(f"ðŸš¨ Error starting ngrok: {e}")

Loading MedGemma Model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



MedGemma Model Loaded.
Starting Uvicorn server in a background thread...
Waiting 10 seconds for the server to boot...


INFO:     Started server process [6389]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): [errno 98] address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


------------------------------------------------------------
--- Your Triage API is LIVE at: NgrokTunnel: "https://unfoldable-nonfeeble-arlo.ngrok-free.dev" -> "http://localhost:8000" ---
--- Give this URL to Person C! ---
------------------------------------------------------------
The API is live. This Colab cell must remain running.
