# 01. Setup and Connection

This notebook covers:
- Verifying database connection
- Checking service availability (Ollama, Langfuse)
- Exploring database schema
- Verifying sample data

## 1. Import Libraries

In [None]:
import sys
sys.path.append('/workspace')

import os
import requests
from src.utils.db_utils import DatabaseConnection, get_database_context
import pandas as pd

print("✓ Libraries imported successfully")

## 2. Check Environment Variables

In [None]:
# Check environment variables
env_vars = {
    'POSTGRES_HOST': os.getenv('POSTGRES_HOST'),
    'POSTGRES_PORT': os.getenv('POSTGRES_PORT'),
    'POSTGRES_DB': os.getenv('POSTGRES_DB'),
    'OLLAMA_HOST': os.getenv('OLLAMA_HOST'),
}

print("Environment Variables:")
for key, value in env_vars.items():
    print(f"  {key}: {value}")

## 3. Test Database Connection

In [None]:
# Initialize database connection
db = DatabaseConnection()

# Test connection
try:
    conn = db.get_connection()
    conn.close()
    print("✓ Database connection successful!")
except Exception as e:
    print(f"✗ Database connection failed: {e}")

## 4. Explore Database Schema

In [None]:
# Get all tables
tables = db.get_all_tables()
print(f"Found {len(tables)} tables:")
for table in tables:
    print(f"  - {table}")

In [None]:
# Examine each table schema
for table in tables:
    print(f"\n{'='*60}")
    print(f"Table: {table}")
    print(f"{'='*60}")
    schema = db.get_table_schema(table)
    display(schema)
    
    # Show row count
    count = db.execute_query(f"SELECT COUNT(*) as count FROM {table}")[0]['count']
    print(f"Row count: {count}")

## 5. View Sample Data

In [None]:
# View sample data from main tables
main_tables = ['departments', 'employees', 'customers', 'projects', 'sales']

for table in main_tables:
    print(f"\n{'='*60}")
    print(f"Sample data from: {table}")
    print(f"{'='*60}")
    df = db.get_table_sample(table, limit=3)
    display(df)

## 6. Check Ollama Service

In [None]:
# Check if Ollama is running
ollama_host = os.getenv('OLLAMA_HOST', 'http://localhost:11434')

try:
    response = requests.get(f"{ollama_host}/api/tags", timeout=5)
    if response.status_code == 200:
        models = response.json().get('models', [])
        print(f"✓ Ollama is running at {ollama_host}")
        print(f"Available models: {len(models)}")
        for model in models:
            print(f"  - {model.get('name')}")
    else:
        print(f"✗ Ollama responded with status {response.status_code}")
except Exception as e:
    print(f"✗ Cannot connect to Ollama: {e}")
    print("To install a model, run in terminal: docker exec -it text2sql-ollama ollama pull llama2")

## 7. Get Database Context for LLM

In [None]:
# Get complete database context that will be used for text2sql
context = get_database_context()
print(context)

## 8. Verify pgvector Extension

In [None]:
# Check if pgvector extension is installed
query = "SELECT * FROM pg_extension WHERE extname = 'vector'"
result = db.execute_query(query)

if result:
    print("✓ pgvector extension is installed")
    print(f"Version: {result[0].get('extversion', 'unknown')}")
else:
    print("✗ pgvector extension not found")

## Summary

You should have verified:
- ✓ Database connection
- ✓ Database schema and sample data
- ✓ Ollama service availability
- ✓ pgvector extension

Next: Move to `02_embedding_and_rag.ipynb` to work with embeddings and RAG.