In [21]:
# Environment check
import sys
from pathlib import Path

python_version = sys.version_info
print(f"Python version: {python_version.major}.{python_version.minor}.{python_version.micro}")
print(f"Environment: {sys.executable}")

if python_version >= (3,12):
    print("Python version compatible")
else:
    print("Need Python 3.12+")
    exit()

Python version: 3.12.12
Environment: /Users/surekha/Documents/projects/RAG/research-assistant/.venv/bin/python
Python version compatible


In [22]:
# Check Docker
import subprocess

try:
    result = subprocess.run(
        ["docker", "--version"], capture_output=True, text=True, timeout=5
    )
    if result.returncode == 0:
        print(f"✓ Docker: {result.stdout}")
    else:
        print("✗ Docker: Not working")
        exit()
except:
    print("✗ Docker: Not found")
    exit()

✓ Docker: Docker version 28.3.3, build 980b856



In [23]:
# Check Docker Compose
try:
    result = subprocess.run(
        ["docker", "compose", "version"], capture_output=True, text=True, timeout=5
    )
    if result.returncode == 0:
        print(f"✓ Docker Compose: {result.stdout.split()[3]}")
    else:
        print("✗ Docker Compose: Not working")
        exit()
except:
    print("✗ Docker Compose: Not found")
    exit()

✓ Docker Compose: v2.39.2-desktop.1


In [24]:
# Check UV Package Manager
try:
    result = subprocess.run(
        ["uv", "--version"], capture_output=True, text=True, timeout=5
    )
    if result.returncode == 0:
        print(f"✓ UV: {result.stdout.strip()}")
        print("\n✓ All required software ready!")
    else:
        print("✗ UV: Not working")
        exit()
except:
    print("✗ UV: Not found")
    exit()

✓ UV: uv 0.9.8 (85c5d3228 2025-11-07)

✓ All required software ready!


#### Start Services

In [25]:
# Check Docker Running
try:
    result = subprocess.run(["docker", "info"], capture_output=True, timeout=5)
    if result.returncode == 0:
        print("✓ Docker is running")
    else:
        print("✗ Docker not running - start Docker Desktop")
        exit()
except:
    print("✗ Docker daemon not accessible")
    exit()

✓ Docker is running


In [26]:
# Find Project Root
current_dir = Path.cwd()
print(current_dir)

if current_dir.name == "notebooks":
    project_root = current_dir.parent
elif (current_dir / "compose.yml").exists():
    project_root = current_dir
else:
    project_root = None

if project_root and (project_root / "compose.yml").exists():
    print(f"✓ Project root: {project_root}")
else:
    print("✗ Missing compose.yml - check directory")
    exit()

/Users/surekha/Documents/projects/RAG/research-assistant/notebooks
✓ Project root: /Users/surekha/Documents/projects/RAG/research-assistant


In [27]:
# Check Current Containers
import json

try:
    result = subprocess.run(
        ["docker", "compose", "ps", "--format", "json"],
        cwd=str(project_root),
        capture_output=True,
        text=True,
        timeout=10,
    )

    if result.returncode == 0 and result.stdout.strip():
        print("Current containers:")
        for line in result.stdout.strip().split("\n"):
            if line.strip():
                try:
                    container = json.loads(line)
                    service = container.get("Service", "unknown")
                    state = container.get("State", "unknown")
                    print(f"  • {service}: {state}")
                except:
                    pass
    else:
        print("No containers running")

except Exception as e:
    print("Could not check containers")

Current containers:
  • airflow: running
  • api: running
  • clickhouse: running
  • opensearch-dashboards: running
  • langfuse: running
  • langfuse-postgres: running
  • ollama: running
  • opensearch: running
  • postgres: running
  • redis: running


In [28]:
# Service Health Check
EXPECTED_SERVICES = {
    "api": "FastAPI REST API server",
    "postgres": "PostgreSQL database",
    "opensearch": "OpenSearch search engine",
    "opensearch-dashboards": "OpenSearch web dashboard",
    "ollama": "Local LLM inference server",
    "airflow": "Workflow automation (optional - may be off)",
}

try:
    result = subprocess.run(
        ["docker", "compose", "ps", "--format", "json"],
        cwd=str(project_root),
        capture_output=True,
        text=True,
        timeout=15,
    )

    if result.returncode == 0:
        print("SERVICE STATUS")
        print("=" * 70)
        print(f"{'Service':<20} {'State':<15} {'Status':<15} {'Notes'}")
        print("-" * 70)
    else:
        print("Could not get service status")
        exit()

except Exception as e:
    print(f"Error checking services: {e}")
    exit()

# Parse Service Status
found_services = set()
service_states = {}

if result.stdout.strip():
    for line in result.stdout.strip().split("\n"):
        if line.strip():
            try:
                container = json.loads(line)
                service = container.get("Service", "unknown")
                state = container.get("State", "unknown")
                health = container.get("Health", "no check")

                found_services.add(service)
                service_states[service] = {"state": state, "health": health}

                if state == "running" and health in ["healthy", "no check"]:
                    indicator = "✓"
                    notes = "Ready"
                elif state == "running" and health == "unhealthy":
                    indicator = "⚠"
                    notes = "Starting up..."
                elif state == "exited":
                    indicator = "✗"
                    notes = "Failed to start"
                else:
                    indicator = "?"
                    notes = f"Status: {state}"

                print(f"{indicator} {service:<18} {state:<14} {health:<14} {notes}")

            except json.JSONDecodeError:
                pass

SERVICE STATUS
Service              State           Status          Notes
----------------------------------------------------------------------
✓ airflow            running        healthy        Ready
✓ api                running        healthy        Ready
✓ clickhouse         running        healthy        Ready
✓ opensearch-dashboards running        healthy        Ready
⚠ langfuse           running        unhealthy      Starting up...
✓ langfuse-postgres  running        healthy        Ready
✓ ollama             running        healthy        Ready
✓ opensearch         running        healthy        Ready
✓ postgres           running        healthy        Ready
✓ redis              running        healthy        Ready


In [29]:
# Check Missing Services
missing_services = set(EXPECTED_SERVICES.keys()) - found_services

if missing_services:
    print("\nMISSING SERVICES:")
    print("-" * 70)
    for service in missing_services:
        description = EXPECTED_SERVICES[service]
        if service == "airflow":
            print(f"⚠ {service:<18} not running    {'(Optional)':<14} {description}")
        else:
            print(f"✗ {service:<18} not running    {'Required':<14} {description}")

failed_services = [
    s
    for s, info in service_states.items()
    if info["state"] in ["exited", "restarting"] or info["health"] == "unhealthy"
]

if failed_services:
    print(f"\nTROUBLESHOOTING:")
    for service in failed_services:
        print(f"   docker compose logs {service}")
elif missing_services and "airflow" not in missing_services:
    print(f"\nACTION NEEDED:")
    print("Start missing services: docker compose up -d")


TROUBLESHOOTING:
   docker compose logs langfuse


In [30]:
# Test FastAPI Health
import requests

try:
    response = requests.get("http://localhost:8000/health", timeout=5)
    if response.status_code == 200:
        data = response.json()
        print("✓ FastAPI is responding")
        print(f"Status: {data.get('status', 'unknown')}")
    else:
        print(f"⚠ API returned status: {response.status_code}")
except requests.exceptions.ConnectionError:
    print("✗ API not responding - wait 1-2 minutes")
except Exception as e:
    print(f"✗ API test error: {e}")

⚠ API returned status: 404


In [31]:
# Test 1: Check PostgreSQL Connection (Basic)
# Let's verify PostgreSQL is accepting connections


def test_postgres_connection():
    """Test PostgreSQL connection using simple socket check."""
    import socket

    try:
        # Test if PostgreSQL port is open
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.settimeout(3)
        result = sock.connect_ex(("localhost", 5432))
        sock.close()

        if result == 0:
            print("✓ PostgreSQL is accepting connections on port 5432!")
            return True
        else:
            print("✗ PostgreSQL port is not accessible")
            return False

    except Exception as e:
        print(f"✗ Could not test PostgreSQL: {e}")
        return False


postgres_available = test_postgres_connection()

if postgres_available:
    print("\n  Database Connection Details:")
    print("• Host: localhost")
    print("• Port: 5432")
    print("• Database: rag_db")
    print("• Username: rag_user")
    print("• Password: rag_password")

    print("\n  Recommended GUI Tools:")
    print("• DBeaver (Free): https://dbeaver.io/download/")
    print("• pgAdmin: https://www.pgadmin.org/download/")

✓ PostgreSQL is accepting connections on port 5432!

  Database Connection Details:
• Host: localhost
• Port: 5432
• Database: rag_db
• Username: rag_user
• Password: rag_password

  Recommended GUI Tools:
• DBeaver (Free): https://dbeaver.io/download/
• pgAdmin: https://www.pgadmin.org/download/


In [32]:
# Test PostgreSQL Connection
try:
    import psycopg2

    conn = psycopg2.connect(
        host="localhost",
        port=5432,
        database="rag_db",
        user="rag_user",
        password="rag_password",
    )

    print("✓ PostgreSQL connected")
    cursor = conn.cursor()

except ImportError:
    print("⚠ psycopg2 not installed - basic connection only")
    exit()
except Exception as e:
    print(f"✗ Database connection failed: {e}")
    exit()

✓ PostgreSQL connected


In [33]:
# Check Database Tables
cursor.execute(
    """
    SELECT table_name 
    FROM information_schema.tables 
    WHERE table_schema = 'public'
    ORDER BY table_name;
"""
)

all_tables = cursor.fetchall()

app_tables = []
airflow_tables = []

for (table_name,) in all_tables:
    if table_name in ["papers", "users", "embeddings"]:
        app_tables.append(table_name)
    else:
        airflow_tables.append(table_name)

print(f"Found {len(all_tables)} total tables")
print(f"Application tables: {len(app_tables)}")
print(f"Airflow tables: {len(airflow_tables)}")

for table in app_tables:
    print(f"  • {table}")

if not app_tables:
    print("  No application tables yet (expected in Week 1)")

cursor.close()
conn.close()

Found 48 total tables
Application tables: 0
Airflow tables: 48
  No application tables yet (expected in Week 1)
