In [None]:
# Import required libraries
import sys
import os

# IMPORTANT: Set these BEFORE importing anything from backend
# This overrides any .env settings for local notebook use
os.environ['DB_HOST'] = 'localhost'
os.environ['DB_PORT'] = '5433'  # Docker exposes PostgreSQL on 5433

# Now load .env for other credentials (user, password, db name)
from dotenv import load_dotenv
load_dotenv()

# Add parent directory to path
sys.path.append('..')

# Now import backend modules (DatabaseManager will use the env vars we just set)
from backend.database import get_session
from backend.models import Document
import pandas as pd

print(f"âœ… Configured for local connection:")
print(f"   Host: {os.getenv('DB_HOST')}")
print(f"   Port: {os.getenv('DB_PORT')}")
print(f"   Database: {os.getenv('POSTGRES_DB')}")
print(f"   User: {os.getenv('POSTGRES_USER')}")

In [None]:
# Connect to database and load documents into DataFrame
with get_session() as session:
    # Query all documents
    documents = session.query(Document).all()
    
    # Convert to list of dicts for pandas
    doc_data = []
    for doc in documents:
        doc_data.append({
            'doc_id': doc.doc_id,
            'title': doc.title,
            'date': doc.date,
            'salience': doc.salience,
            'salience_bool': doc.salience_bool,
            'raw_text': doc.raw_text[:200] if doc.raw_text else None,  # First 200 chars
            'summary': doc.summary,
            'source': doc.source,
            'url': doc.url
        })
    
    df = pd.DataFrame(doc_data)

print(f"Loaded {len(df)} documents")
df.head()

In [None]:
# Check data types and null values
df.info()

In [None]:
# View a sample document with full details
if len(df) > 0:
    sample_doc_id = df.iloc[0]['doc_id']
    
    with get_session() as session:
        doc = session.get(Document, sample_doc_id)
        
        print(f"Document ID: {doc.doc_id}")
        print(f"Title: {doc.title}")
        print(f"Date: {doc.date}")
        print(f"Salience: {doc.salience}")
        print(f"Salience Bool: {doc.salience_bool}")
        print(f"\nCategories: {[cat.category for cat in doc.categories]}")
        print(f"Subcategories: {[sub.subcategory for sub in doc.subcategories]}")
        print(f"Initiating Countries: {[country.initiating_country for country in doc.initiating_countries]}")
        print(f"Recipient Countries: {[country.recipient_country for country in doc.recipient_countries]}")
        print(f"Projects: {[proj.project for proj in doc.projects]}")
        print(f"\nSummary: {doc.summary}")
        print(f"\nRaw Text (first 500 chars):\n{doc.raw_text[:500] if doc.raw_text else 'N/A'}")