# ContentPulse Lakebase Setup
Create Databricks Lakebase catalog and synced tables for ContentPulse dashboard


In [1]:
# Force reload of contentpulse_config
import sys
import importlib

# Remove from cache if exists
if 'contentpulse_config' in sys.modules:
    del sys.modules['contentpulse_config']

print("\n‚úÖ Variables forcefully reloaded from contentpulse_config.ipynb")



‚úÖ Variables forcefully reloaded from contentpulse_config.ipynb


In [2]:
%run ./contentpulse_config


üì∞ ContentPulse Project Configuration
   Project: ContentPulse_Publishing_Analytics
   User: kunal.gaurav@databricks.com
   Volume: /Volumes/kunal/publishing/publishing_data
   Table: kunal.publishing.content_engagement_events
üóÑÔ∏è  Lakebase Configuration
   Instance: kunal-gaurav-lakebase-instance
   Catalog: pg_contentpulse_kunal-gaurav
   Synced Table: kunal.publishing.content_engagement_synced
‚úÖ Catalog 'kunal' ready
‚úÖ Schema 'kunal.publishing' ready
‚úÖ Volume '/Volumes/kunal/publishing/publishing_data' ready
‚úÖ Table 'kunal.publishing.content_engagement_events' ready with Change Data Feed enabled


In [4]:
%pip install databricks-sdk

from databricks.sdk import WorkspaceClient
from databricks.sdk.service.database import (
    DatabaseInstance,
    DatabaseCatalog,
    DatabaseTable,
    SyncedDatabaseTable,
    SyncedTableSpec,
    NewPipelineSpec,
    SyncedTableSchedulingPolicy
)
import time

print("‚úÖ Databricks SDK imported")


Note: you may need to restart the kernel to use updated packages.
‚úÖ Databricks SDK imported


In [5]:
# Initialize Databricks Workspace Client
w = WorkspaceClient()

print(f"‚úÖ Connected to workspace: {w.config.host}")
print(f"   User: {w.current_user.me().user_name}")


‚úÖ Connected to workspace: https://e2-demo-field-eng.cloud.databricks.com
   User: kunal.gaurav@databricks.com


In [6]:
# Display loaded configuration
print(f"‚úÖ ContentPulse Configuration loaded:")
print(f"   Project: {PROJECT_NAME}")
print(f"   Instance: {INSTANCE_NAME}")
print(f"   Capacity: {INSTANCE_CAPACITY}")
print(f"   Source Table: {FULL_TABLE_NAME}")
print(f"   Synced Table: {SYNCED_TABLE_NAME}")
print(f"   Lakebase Catalog: {LAKEBASE_CATALOG_NAME}")


‚úÖ ContentPulse Configuration loaded:
   Project: ContentPulse_Publishing_Analytics
   Instance: kunal-gaurav-lakebase-instance
   Capacity: CU_1
   Source Table: kunal.publishing.content_engagement_events
   Synced Table: kunal.publishing.content_engagement_synced
   Lakebase Catalog: pg_contentpulse_kunal-gaurav


## Step 1: Check/Use Existing Lakebase Instance


In [7]:
# Check if database instance exists
def get_database_instance_by_name(instance_name):
    """Check if database instance exists"""
    for instance in w.database.list_database_instances():
        if instance.name == instance_name:
            return instance
    return None

# Check if instance exists
existing_instance = get_database_instance_by_name(INSTANCE_NAME)

if existing_instance:
    print(f"‚úÖ Database Instance '{INSTANCE_NAME}' already exists")
    print(f"   Instance ID: {existing_instance.uid}")
    print(f"   Capacity: {existing_instance.capacity}")
    print(f"   State: {existing_instance.state}")
    print(f"   üìù Reusing existing instance for ContentPulse")
    instance = existing_instance
else:
    print(f"üîÑ Creating Database Instance: {INSTANCE_NAME}...")
    print(f"   Capacity: {INSTANCE_CAPACITY}")
    print(f"   This may take 5-10 minutes...")
    
    # Create database instance
    instance = w.database.create_database_instance_and_wait(
        database_instance=DatabaseInstance(
            name=INSTANCE_NAME,
            capacity=INSTANCE_CAPACITY
        )
    )
    
    print(f"‚úÖ Database Instance created successfully!")
    print(f"   Instance Name: {instance.name}")
    print(f"   Instance UID: {instance.uid}")
    print(f"   Capacity: {instance.capacity}")
    print(f"   State: {instance.state}")

# Save instance name for later use
LAKEBASE_INSTANCE_NAME = instance.name
print(f"\nüíæ Instance Name: {LAKEBASE_INSTANCE_NAME}")


‚úÖ Database Instance 'kunal-gaurav-lakebase-instance' already exists
   Instance ID: f60d62f1-e44a-43c7-813f-58138e0552fd
   Capacity: CU_1
   State: DatabaseInstanceState.AVAILABLE
   üìù Reusing existing instance for ContentPulse

üíæ Instance Name: kunal-gaurav-lakebase-instance


## Step 2: Create ContentPulse Database Catalog


In [8]:
# Create or get database catalog for ContentPulse
print(f"üîÑ Setting up Database Catalog: {LAKEBASE_CATALOG_NAME}...")

try:
    # Try to create database catalog
    db_catalog = w.database.create_database_catalog(
        catalog=DatabaseCatalog(
            name=LAKEBASE_CATALOG_NAME,
            database_instance_name=LAKEBASE_INSTANCE_NAME,
            database_name=LAKEBASE_CATALOG_NAME,
            create_database_if_not_exists=True
        )
    )
    
    print(f"‚úÖ Database Catalog created!")
    print(f"   Catalog Name: {db_catalog.name}")
    print(f"   Database Name: {db_catalog.database_name}")
    
except Exception as e:
    error_msg = str(e)
    
    # Check if it's an "already exists" error
    if "already exists" in error_msg.lower():
        print(f"‚úÖ Database Catalog '{LAKEBASE_CATALOG_NAME}' already exists")
        print(f"   Using existing catalog")
        db_catalog = None
    else:
        # Re-raise if it's a different error
        print(f"‚ùå Unexpected error: {e}")
        raise

print(f"\nüíæ Lakebase Catalog: {LAKEBASE_CATALOG_NAME}")
print(f"   Instance: {LAKEBASE_INSTANCE_NAME}")


üîÑ Setting up Database Catalog: pg_contentpulse_kunal-gaurav...
‚úÖ Database Catalog created!
   Catalog Name: pg_contentpulse_kunal-gaurav
   Database Name: pg_contentpulse_kunal-gaurav

üíæ Lakebase Catalog: pg_contentpulse_kunal-gaurav
   Instance: kunal-gaurav-lakebase-instance


## Step 3: Create Synced Table (Delta ‚Üí PostgreSQL)


In [9]:
# Create a synced table (syncs from UC Delta to PostgreSQL)
print(f"üîÑ Creating Synced Database Table: {SYNCED_TABLE_NAME}...")
print(f"   Source Table: {FULL_TABLE_NAME}")

try:
    # Define the pipeline specification for the sync
    pipeline_spec = NewPipelineSpec(
        # Location for pipeline checkpoints and logs
        storage_catalog=LAKEBASE_CATALOG_NAME,
        storage_schema=SCHEMA_NAME
    )
    
    # Create the synced table spec
    synced_spec = SyncedTableSpec(
        source_table_full_name=FULL_TABLE_NAME,  # Source Delta table
        primary_key_columns=["event_id", "timestamp"],  # Primary keys for ContentPulse
        create_database_objects_if_missing=True,  # Auto-create PG objects
        scheduling_policy=SyncedTableSchedulingPolicy.CONTINUOUS,
        new_pipeline_spec=pipeline_spec
    )
    
    # Create the synced database table
    synced_table = w.database.create_synced_database_table(
        synced_table=SyncedDatabaseTable(
            name=SYNCED_TABLE_NAME,  # UC catalog.schema.table name
            database_instance_name=LAKEBASE_INSTANCE_NAME, 
            logical_database_name=LAKEBASE_CATALOG_NAME,
            spec=synced_spec
        )
    )
    
    print(f"‚úÖ Synced Table created!")
    print(f"   Table Name: {synced_table.name}")
    print(f"   Source: {FULL_TABLE_NAME}")
    print(f"   Target Instance: {LAKEBASE_INSTANCE_NAME}")
    print(f"   Scheduling: CONTINUOUS")
    print(f"   Primary Keys: event_id, timestamp")
except Exception as e:
    print(f"‚ö†Ô∏è  Error: {e}")
    print(f"   Make sure source table '{FULL_TABLE_NAME}' exists and has data")


üîÑ Creating Synced Database Table: kunal.publishing.content_engagement_synced...
   Source Table: kunal.publishing.content_engagement_events
‚úÖ Synced Table created!
   Table Name: kunal.publishing.content_engagement_synced
   Source: kunal.publishing.content_engagement_events
   Target Instance: kunal-gaurav-lakebase-instance
   Scheduling: CONTINUOUS
   Primary Keys: event_id, timestamp


## Step 4: Generate Connection Details for Dashboard


In [None]:
# Generate credentials for the Lakebase instance
instance = w.database.get_database_instance(name=LAKEBASE_INSTANCE_NAME)
credential = w.database.generate_database_credential(
    instance_names=[LAKEBASE_INSTANCE_NAME]
)

host = instance.read_write_dns
port = 5432
dbname = LAKEBASE_CATALOG_NAME
user = DB_USER_EMAIL
password = credential.token

print("=" * 70)
print("üìù CONNECTION DETAILS FOR CONTENTPULSE DASHBOARD")
print("=" * 70)
print(f"Host: {host}")
print(f"Port: {port}")
print(f"Database: {dbname}")
print(f"User: {user}")
print(f"Instance Name: {LAKEBASE_INSTANCE_NAME}")
print("=" * 70)


## ‚úÖ ContentPulse Lakebase Setup Complete!

Your ContentPulse data is now syncing from Delta Lake to PostgreSQL for real-time dashboard access.
