# Colab FastAPI + Ollama + YOLO

"
            "This notebook provisions the FastAPI project, installs Ollama/YOLO, and exposes the API via ngrok.
"
            "Run the cells top-to-bottom whenever you need a fresh Colab runtime.

## Before you run
"
            "1. Upload this repository (or sync from Git) into `/content/colab_fastapi`.
"
            "2. (Optional) Add your ngrok authtoken to the Colab secrets manager and set `NGROK_AUTHTOKEN`.
"
            "3. Decide which Ollama + YOLO models you want to use; defaults are `phi3` and `yolov8n.pt`.

In [None]:
from pathlib import Path
import os

PROJECT_ROOT = Path('/content/colab_fastapi').resolve()
if not PROJECT_ROOT.exists():
    raise RuntimeError('Upload the project into /content/colab_fastapi before continuing.')

os.chdir(PROJECT_ROOT)
print('Working directory:', PROJECT_ROOT)

In [None]:
import os

os.environ.setdefault('FASTAPI_PORT', '8000')
os.environ.setdefault('OLLAMA_MODEL', 'phi3')
os.environ.setdefault('YOLO_MODEL', 'yolov8n.pt')
os.environ.setdefault('YOLO_CONFIDENCE', '0.35')

print('FASTAPI_PORT =', os.environ['FASTAPI_PORT'])
print('OLLAMA_MODEL =', os.environ['OLLAMA_MODEL'])
print('YOLO_MODEL =', os.environ['YOLO_MODEL'])
print('YOLO_CONFIDENCE =', os.environ['YOLO_CONFIDENCE'])
print('NGROK_AUTHTOKEN =', os.environ.get('NGROK_AUTHTOKEN', '<unset>'))

In [None]:
%%bash
cd /content/colab_fastapi
sudo apt-get update -y
sudo apt-get install -y curl git
pip install --upgrade pip
pip install -r requirements.txt

In [None]:
%%bash
cd /content/colab_fastapi
chmod +x scripts/install_ollama.sh
export OLLAMA_MODEL=${OLLAMA_MODEL:-phi3}
export OLLAMA_PORT=${OLLAMA_PORT:-11434}
scripts/install_ollama.sh

In [None]:
import asyncio
import os
import threading
import time

import uvicorn

FASTAPI_PORT = int(os.environ['FASTAPI_PORT'])

if 'SERVER_THREAD' in globals():
    print('FastAPI server already running.')
else:
    config = uvicorn.Config('app.main:app', host='0.0.0.0', port=FASTAPI_PORT, log_level='info')
    server = uvicorn.Server(config)

    def _run_server():
        asyncio.run(server.serve())

    SERVER_THREAD = threading.Thread(target=_run_server, daemon=True)
    SERVER_THREAD.start()
    time.sleep(3)
    print(f'FastAPI server started on port {FASTAPI_PORT}.')

In [None]:
import os

from scripts.start_ngrok import start_ngrok

public_url = start_ngrok(port=int(os.environ['FASTAPI_PORT']), authtoken=os.environ.get('NGROK_AUTHTOKEN'))
public_url

In [None]:
import asyncio
import base64
import io
import os

import cv2
import httpx
import numpy as np

FASTAPI_ROOT = f"http://127.0.0.1:{os.environ['FASTAPI_PORT']}"

async def run_smoke_tests():
    async with httpx.AsyncClient(timeout=120) as client:
        gen_payload = {
            'prompt': 'Say hello from the Colab FastAPI service.',
            'model': os.environ.get('OLLAMA_MODEL'),
        }
        gen_resp = await client.post(f'{FASTAPI_ROOT}/ollama/generate', json=gen_payload)
        gen_resp.raise_for_status()
        print('Ollama /generate →', gen_resp.json())

        dummy = np.zeros((320, 320, 3), dtype=np.uint8)
        cv2.putText(dummy, 'COLAB', (30, 170), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
        success, buffer = cv2.imencode('.jpg', dummy)
        if not success:
            raise RuntimeError('Failed to encode dummy image.')
        files = {'file': ('dummy.jpg', buffer.tobytes(), 'image/jpeg')}
        detect_resp = await client.post(f'{FASTAPI_ROOT}/yolo/detect', files=files)
        detect_resp.raise_for_status()
        print('YOLO /detect →', detect_resp.json())

asyncio.run(run_smoke_tests())

### Cleanup
Run the cell below when you need to stop the server and ngrok tunnel.

In [None]:
import asyncio

if 'server' in globals():
    asyncio.run(server.shutdown())
    print('FastAPI server shutdown requested.')
if 'SERVER_THREAD' in globals():
    SERVER_THREAD.join(timeout=5)
    print('Server thread joined.')

from pyngrok import ngrok
ngrok.kill()
print('ngrok tunnel closed.')