# USCCB Parish Extraction - Google Colab Setup

This notebook sets up the environment for parish extraction in Google Colab.

## Required Secrets
Add these to your Colab Secrets (🔑 icon in sidebar):
- `SUPABASE_URL` - Your Supabase project URL
- `SUPABASE_KEY` - Your Supabase API key
- `GENAI_API_KEY_USCCB` - Your Google AI API key

In [None]:
# Cell 1: Clone Repository and Install Dependencies
import os
import sys

# Clone the repository (only if not already cloned)
if not os.path.exists('/content/usccb-parish-extraction'):
    !git clone https://github.com/tomknightatl/usccb-parish-extraction.git
    print("✅ Repository cloned")
else:
    print("✅ Repository already exists")
    os.chdir('/content/usccb-parish-extraction')
    !git pull  # Update to latest version
    print("✅ Repository updated")

# Change to repo directory
os.chdir('/content/usccb-parish-extraction')

# Install dependencies
!pip install -r requirements.txt
print("✅ Dependencies installed")

In [None]:
# Cell 2: Setup Python Paths
import sys
import os

# Add src directory to Python path so we can import our modules
repo_path = '/content/usccb-parish-extraction'
if repo_path not in sys.path:
    sys.path.append(repo_path)

# Change to repository directory
os.chdir(repo_path)

print(f"✅ Working directory: {os.getcwd()}")
print(f"✅ Python path configured")

In [None]:
# Cell 3: Setup Configuration with Colab Secrets
from google.colab import userdata
from config.settings import setup_environment, set_config

# Setup environment with Colab secrets
try:
    config = setup_environment(
        supabase_url=userdata.get('SUPABASE_URL'),
        supabase_key=userdata.get('SUPABASE_KEY'),
        genai_api_key=userdata.get('GENAI_API_KEY_USCCB'),
        max_dioceses=5  # Default for demos
    )
    
    # Set global config
    set_config(config)
    
    print("✅ Configuration setup complete")
    
except Exception as e:
    print(f"❌ Configuration error: {e}")
    print("\nPlease ensure you have added the required secrets to Colab:")
    print("1. Click the 🔑 icon in the left sidebar")
    print("2. Add secrets: SUPABASE_URL, SUPABASE_KEY, GENAI_API_KEY_USCCB")
    print("3. Enable notebook access for each secret")
    config = None

In [None]:
# Cell 4: Verify Installation
try:
    # Test core imports
    from src.models import Parish, ExtractionResult, SiteType, Diocese
    from src.utils.webdriver import setup_driver, clean_text
    from src.utils.ai_analysis import analyze_with_ai, detect_site_type
    from src.utils.database import save_parishes_to_database
    from src.extractors import get_extractor
    from src.pipeline import ParishExtractionPipeline
    
    print("✅ All core modules imported successfully")
    
    # Test configuration
    if config:
        print(f"✅ Database: {'Connected' if config.supabase else 'Not connected'}")
        print(f"✅ AI: {'Configured' if config.genai_enabled else 'Mock mode'}")
        print(f"✅ Max dioceses: {config.max_dioceses}")
        print("\n🚀 Ready to run parish extraction!")
    else:
        print("⚠️ Configuration incomplete - some features may not work")
    
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("Please check that all files are in the correct location")
    print("You may need to restart the runtime and run the setup again")

In [None]:
# Cell 5: Quick System Test (Optional)
if config:
    print("🧪 Running quick system test...")
    
    try:
        # Test webdriver setup
        driver = setup_driver()
        print("✅ WebDriver setup successful")
        driver.quit()
        
        # Test AI analysis (with mock if no API key)
        result = analyze_with_ai("parish directory test", "parish_directory")
        print(f"✅ AI analysis working (score: {result.get('score', 'N/A')})")
        
        # Test database connection
        if config.supabase:
            response = config.supabase.table('Dioceses').select('Name').limit(1).execute()
            print("✅ Database connection verified")
        else:
            print("⚠️ Database not configured - will run in test mode")
        
        print("\n🎉 All systems ready! You can now run the other notebooks.")
        
    except Exception as e:
        print(f"⚠️ Test warning: {e}")
        print("System may still work, but with limited functionality")
else:
    print("⚠️ Skipping system test - configuration incomplete")