In [None]:
# provider-notebooks/Module 3 - Delta Sharing Architectural Patterns and Best Practices/Includes/Demo-Setup-3.1.ipynb

In [0]:
%run ./_common

In [0]:
# Set variables in python and sql
config_values = [
    ("catalog", "global_analytics"),
    ("schema", "transactions"),
    ("source_table", "transactions"),
    ("replica_table", "transactions_r2_replica"),
    ("r2_bucket", "global-analytics-replication"),
    ("r2_path", "transactions/"),
]
DA = create_da_objects(config_values)

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS IDENTIFIER(DA.catalog)
COMMENT 'Provider catalog for cross-cloud replication demo';
USE CATALOG IDENTIFIER(DA.catalog);

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS IDENTIFIER(DA.schema)
COMMENT 'Schema containing transaction data for R2 replication';
USE SCHEMA IDENTIFIER(DA.schema);

In [0]:
display_config_values(config_values)

In [None]:
# Cleanup existing resources before setup to ensure idempotent demo runs
print("Cleaning up existing resources...")

cleanup_commands = [
    # Drop existing tables first
    f"DROP TABLE IF EXISTS {DA.catalog}.{DA.schema}.{DA.source_table}",
    f"DROP TABLE IF EXISTS {DA.catalog}.{DA.schema}.{DA.replica_table}",
    # Drop external location and storage credential
    "DROP EXTERNAL LOCATION IF EXISTS r2_location",
    "DROP STORAGE CREDENTIAL IF EXISTS r2_credential",
]

for cmd in cleanup_commands:
    try:
        spark.sql(cmd)
        print(f"✓ {cmd}")
    except Exception as e:
        print(f"Note: {cmd} - {str(e)}")

print("Cleanup completed.")

In [None]:
-- Create and populate the source table with sample transaction data
CREATE OR REPLACE TABLE IDENTIFIER(CONCAT(DA.catalog, '.', DA.schema, '.', DA.source_table)) (
  transaction_id STRING,
  customer_id STRING,
  product_category STRING,
  amount DECIMAL(10,2),
  transaction_date DATE,
  region STRING,
  created_at TIMESTAMP
)
PARTITIONED BY (transaction_date)
TBLPROPERTIES (
  'delta.enableChangeDataFeed' = 'true',
  'delta.autoOptimize.optimizeWrite' = 'true'
)
COMMENT 'Source transaction table with Change Data Feed enabled for R2 replication'

In [None]:
-- Insert initial sample data for the demo
INSERT INTO IDENTIFIER(CONCAT(DA.catalog, '.', DA.schema, '.', DA.source_table))
VALUES 
  ('txn_001', 'cust_001', 'Electronics', 299.99, '2024-10-01', 'North America', '2024-10-01T10:00:00'),
  ('txn_002', 'cust_002', 'Clothing', 79.50, '2024-10-01', 'Europe', '2024-10-01T10:15:00'),
  ('txn_003', 'cust_003', 'Books', 24.99, '2024-10-01', 'Asia Pacific', '2024-10-01T10:30:00'),
  ('txn_004', 'cust_004', 'Home & Garden', 149.00, '2024-10-02', 'Latin America', '2024-10-02T09:00:00'),
  ('txn_005', 'cust_005', 'Sports', 89.99, '2024-10-02', 'North America', '2024-10-02T09:15:00'),
  ('txn_006', 'cust_006', 'Electronics', 599.99, '2024-10-02', 'Europe', '2024-10-02T14:00:00'),
  ('txn_007', 'cust_007', 'Clothing', 159.50, '2024-10-03', 'Asia Pacific', '2024-10-03T08:00:00'),
  ('txn_008', 'cust_008', 'Books', 45.99, '2024-10-03', 'North America', '2024-10-03T11:00:00'),
  ('txn_009', 'cust_009', 'Sports', 199.99, '2024-10-03', 'Europe', '2024-10-03T16:30:00'),
  ('txn_010', 'cust_010', 'Home & Garden', 299.00, '2024-10-04', 'Asia Pacific', '2024-10-04T13:45:00')

In [None]:
-- Verify the setup is complete
SELECT 
  'Source Table Setup' as status,
  COUNT(*) as record_count,
  MIN(transaction_date) as earliest_date,
  MAX(transaction_date) as latest_date,
  ROUND(SUM(amount), 2) as total_amount
FROM IDENTIFIER(CONCAT(DA.catalog, '.', DA.schema, '.', DA.source_table))

In [None]:
print("=" * 80)
print("🚀 Demo Setup 3.1 Complete!")
print("=" * 80)
print(f"✓ Catalog: {DA.catalog}")
print(f"✓ Schema: {DA.schema}")
print(f"✓ Source Table: {DA.source_table} (with Change Data Feed enabled)")
print(f"✓ R2 Replica Table: {DA.replica_table} (ready to be created)")
print(f"✓ R2 Bucket: {DA.r2_bucket}")
print(f"✓ R2 Path: {DA.r2_path}")
print("")
print("📊 Sample data loaded - ready for cross-cloud replication demo!")
print("🔄 Environment is clean and idempotent for repeated demo runs")
print("=" * 80)