# proto_loc Platform Validation

This notebook validates that all platform components are working correctly before loading NYC taxi data.

In [None]:
# Test 1: Check DuckDB databases
import duckdb
import os
from pathlib import Path

print("🔍 Testing DuckDB databases...")

db_paths = {
    'raw': '02_duck_db/01_raw/raw.duckdb',
    'dev': '02_duck_db/02_dev/dev.duckdb',
    'prod': '02_duck_db/03_prod/prod.duckdb'
}

for name, path in db_paths.items():
    if Path(path).exists():
        conn = duckdb.connect(path)
        schemas = conn.execute("SHOW SCHEMAS").fetchall()
        conn.close()
        print(f"✅ {name}: {len(schemas)} schemas")
    else:
        print(f"❌ {name}: Database not found")

In [None]:
# Test 2: Validate dbt project
import subprocess
import os

print("🔍 Testing dbt project...")
os.chdir('04_dbt')

try:
    result = subprocess.run(['dbt', 'debug'], capture_output=True, text=True, timeout=30)
    if result.returncode == 0:
        print("✅ dbt project configured correctly")
    else:
        print(f"❌ dbt issues: {result.stderr}")
except Exception as e:
    print(f"❌ dbt test failed: {e}")

os.chdir('..')

In [None]:
# Test 3: Check Cube.js configuration
import json
import os

print("🔍 Testing Cube.js configuration...")

cube_config = '05_cube_dev/cube.js'
if Path(cube_config).exists():
    print("✅ Cube.js configuration found")
else:
    print("❌ Cube.js configuration missing")

In [None]:
# Test 4: Validate environment
print("🔍 Checking environment...")

# Check if .env exists
if Path('.env').exists():
    print("✅ .env file found")
else:
    print("⚠️  .env file missing (copy from .env.example)")

# Check OpenAI key
openai_key = os.getenv('OPENAI_API_KEY')
if openai_key:
    print("✅ OpenAI API key configured")
else:
    print("⚠️  OpenAI API key not set")

In [None]:
# Test 5: Quick platform health check
print("🎉 Platform validation complete!")
print("\nNext steps:")
print("1. Run: python init_duckdb.py")
print("2. Run: docker-compose up")
print("3. Open services at their respective ports")
print("4. Load NYC taxi data and begin development!")

In [None]:
# Test 6: Network Connectivity
import socket

print("🔍 Testing network connectivity...")

services = {
    'dagster': 3000,
    'cube': 4000,
    'superset': 8088
}

for service, port in services.items():
    try:
        s = socket.create_connection(('localhost', port), timeout=5)
        s.close()
        print(f"✅ {service} reachable on port {port}")
    except:
        print(f"❌ {service} not reachable on port {port}")

In [None]:
# Test 7: DuckDB Concurrency Simulation
import concurrent.futures

print("🔍 Simulating DuckDB concurrency...")

def read_query(db_path):
    conn = duckdb.connect(db_path, read_only=True)
    result = conn.execute("SELECT COUNT(*) FROM information_schema.tables").fetchone()
    conn.close()
    return result

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    futures = [executor.submit(read_query, path) for path in db_paths.values()]
    for future in concurrent.futures.as_completed(futures):
        try:
            print(f"✅ Concurrent read successful: {future.result()}")
        except Exception as e:
            print(f"❌ Concurrency issue: {e}")

In [None]:
# Test 8: PandasAI LLM Configuration
from pandasai import SmartDataframe
from pandasai.llm import OpenAI, Anthropic, GoogleGemini
import pandas as pd
import os

print("🔍 Testing PandasAI LLM configuration...")

preferred = os.getenv('PREFERRED_LLM', 'openai')

llm = None
if preferred == 'openai' and os.getenv('OPENAI_API_KEY'):
    llm = OpenAI(api_token=os.getenv('OPENAI_API_KEY'))
elif preferred == 'anthropic' and os.getenv('ANTHROPIC_API_KEY'):
    llm = Anthropic(api_token=os.getenv('ANTHROPIC_API_KEY'))
elif preferred == 'gemini' and os.getenv('GEMINI_API_KEY'):
    llm = GoogleGemini(api_token=os.getenv('GEMINI_API_KEY'))
elif os.getenv('OPENAI_API_KEY'):
    llm = OpenAI(api_token=os.getenv('OPENAI_API_KEY'))  # Fallback

if llm:
    df = pd.DataFrame({'test': [1, 2, 3]})
    sdf = SmartDataframe(df, config={"llm": llm})
    result = sdf.chat("What is the sum?")
    print(f"✅ PandasAI working with {preferred}: {result}")
else:
    print("❌ No LLM configured - check .env keys")