# FastAPI app

## Upload this notebook and qlora_lora_tinyllama.zip to MyDrive

## Open this notebook in colab

## Mount Google drive

In [1]:
!pip install -q fastapi uvicorn[standard] pyngrok

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m517.7/517.7 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m456.8/456.8 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
[?25h

## Steps to get ngrok access key

Create an ngrok account (free) → https://dashboard.ngrok.com

Copy your auth token.

In [2]:
from pyngrok import ngrok

NGROK_AUTH_TOKEN = "Replace with your ngrok key"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)



## Run server

### Adjust path of `qlora_lora_tinyllama.zip` if needed

In [3]:
!unzip -o "/content/drive/MyDrive/qlora_lora_tinyllama.zip"

Archive:  /content/drive/MyDrive/qlora_lora_tinyllama.zip
   creating: content/qlora_lora_tinyllama/
  inflating: content/qlora_lora_tinyllama/tokenizer_config.json  
   creating: content/qlora_lora_tinyllama/checkpoint-137/
  inflating: content/qlora_lora_tinyllama/checkpoint-137/tokenizer_config.json  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/training_args.bin  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/scheduler.pt  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/trainer_state.json  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/chat_template.jinja  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/tokenizer.model  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/scaler.pt  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/tokenizer.json  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/adapter_config.json  
  inflating: content/qlora_lora_tinyllama/checkpoint-137/rng_state.pth  
  inflating: conte

### Write server file

In [4]:
%%writefile main.py
from fastapi import FastAPI, Depends, HTTPException, Header
import sqlite3
from typing import Optional
import torch

app = FastAPI(title="Recipe API")

DB_PATH = "auth.db"

def get_db():
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    return conn

def init_db():
    conn = get_db()
    cur = conn.cursor()

    cur.execute("""
        CREATE TABLE IF NOT EXISTS api_keys (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            username TEXT UNIQUE NOT NULL,
            password TEXT NOT NULL,
            api_key TEXT NOT NULL
        );
    """)

    cur.execute("""
        INSERT OR IGNORE INTO api_keys (username, password, api_key)
        VALUES ('admin', 'admin123', 'mysecretkey123');
    """)

    conn.commit()
    conn.close()

init_db()

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

MODEL_PATH = "/content/content/qlora_lora_tinyllama"

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=True, legacy=False)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    dtype=torch.float16,
    device_map="auto"
)

model = PeftModel.from_pretrained(model, MODEL_PATH)
model.eval()


def verify_api_key(x_api_key: Optional[str] = Header(None)):
    if x_api_key is None:
        raise HTTPException(status_code=401, detail="Missing API key")

    conn = get_db()
    cur = conn.cursor()
    cur.execute("SELECT * FROM api_keys WHERE api_key = ?", (x_api_key,))
    user = cur.fetchone()

    if not user:
        raise HTTPException(status_code=401, detail="Invalid API key")

    return user["username"]

@app.post("/login")
def login(username: str, password: str):
    conn = get_db()
    cur = conn.cursor()
    cur.execute(
        "SELECT api_key FROM api_keys WHERE username = ? AND password = ?",
        (username, password)
    )
    row = cur.fetchone()

    if not row:
        raise HTTPException(status_code=401, detail="Invalid username or password")

    return {"api_key": row["api_key"]}


@app.get("/recipe")
def get_recipe(
    ingredients: str,
    username: str = Depends(verify_api_key)
):
    """
    Example: /recipe?ingredients=Egg,Onions
    """

    # Build the structured cooking-assistant prompt
    prompt = (
        "<<system>> You are a skilled cooking assistant.\n"
        f"<<user>> Suggest a clean, well-formatted recipe using these ingredients: {ingredients}\n"
        "<<assistant>>"
    )

    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id,
        )

    recipe = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    return {
        "message": "Here is your recipe!",
        "user": username,
        "ingredients": ingredients,
        "recipe": recipe,
    }


Writing main.py


### Run server in background

In [5]:
import subprocess
process = subprocess.Popen(
    ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
)

## Get publically accessible url

### Note since code is running on cpu: it takes time to load model and come live

In [6]:
public_url = ngrok.connect(8000)
public_url

<NgrokTunnel: "https://vaunted-elodia-blowfly.ngrok-free.dev" -> "http://localhost:8000">

## Sample requests

### you can also try with `http://localhost:8000/` if work for you

In [None]:
# Replace base url address with ngrok tunnel url
!curl -X POST "https://vaunted-elodia-blowfly.ngrok-free.dev/login?username=admin&password=admin123"

In [None]:
# Replace base url address with ngrok tunnel url
!curl -G "https://vaunted-elodia-blowfly.ngrok-free.dev/recipe" \
    -H "x-api-key: mysecretkey123" \
    --data-urlencode "ingredients=Egg,Onions"

## Kill server proceess

In [8]:
ngrok.kill()
process.terminate()