# Lakebase Data Querying & Analysis

This notebook demonstrates how to query and analyze data from your Lakebase database.

In [None]:
import psycopg2
import pandas as pd
from databricks.sdk import WorkspaceClient
import uuid
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

## Configuration

In [None]:
# Configuration
# Configuration
INSTANCE_NAME = "rb-demo-lakebase"
PROFILE = "az-demo"
USER_NAME = "rohit.bhagwat@databricks.com"

In [None]:
# Get instance and generate credentials
w = WorkspaceClient(profile=PROFILE)
instance = w.database.get_database_instance(name=INSTANCE_NAME)
cred = w.database.generate_database_credential(
    request_id=str(uuid.uuid4()), 
    instance_names=[INSTANCE_NAME]
)

print(f"✅ Connected to instance: {instance.name}")

In [None]:
# Connect to PostgreSQL
conn = psycopg2.connect(
    host=instance.read_write_dns,
    dbname="databricks_postgres",
    user=USER_NAME,
    password=cred.token,
    sslmode="require"
)

print("✅ Connected to PostgreSQL database")

## Basic Data Exploration

In [None]:
# Get all coffee shops
with conn.cursor() as cur:
    cur.execute("SELECT * FROM coffee_operations.coffee_shops")
    rows = cur.fetchall()
    
print(f"📊 Found {len(rows)} coffee shops")
print("\nFirst 3 shops:")
for i, row in enumerate(rows[:3]):
    print(f"  {i+1}. {row[1]} - {row[2]}, {row[3]}")

## Geographic Analysis

In [None]:
# Analyze shops by country
with conn.cursor() as cur:
    cur.execute("""
        SELECT country, COUNT(*) as shop_count, AVG(seating_capacity) as avg_capacity
        FROM coffee_operations.coffee_shops
        GROUP BY country
        ORDER BY shop_count DESC
    """)
    
    country_stats = cur.fetchall()
    
print("🌍 Shops by Country:")
for country, count, avg_cap in country_stats:
    print(f"   {country}: {count} shops, avg {avg_cap:.0f} seats")

## Premium vs Standard Locations

In [None]:
# Compare premium vs standard locations
with conn.cursor() as cur:
    cur.execute("""
        SELECT 
            is_premium_location,
            COUNT(*) as shop_count,
            AVG(seating_capacity) as avg_capacity,
            MIN(seating_capacity) as min_capacity,
            MAX(seating_capacity) as max_capacity
        FROM coffee_operations.coffee_shops
        GROUP BY is_premium_location
        ORDER BY is_premium_location DESC
    """)
    
    premium_stats = cur.fetchall()
    
print("⭐ Premium vs Standard Analysis:")
for is_premium, count, avg_cap, min_cap, max_cap in premium_stats:
    type_label = "Premium" if is_premium else "Standard"
    print(f"\n{type_label} Locations ({count} shops):")
    print(f"   Average capacity: {avg_cap:.1f} seats")
    print(f"   Capacity range: {min_cap} - {max_cap} seats")

## Time Zone Distribution

In [None]:
# Analyze shops by time zone
with conn.cursor() as cur:
    cur.execute("""
        SELECT 
            time_zone,
            COUNT(*) as shop_count,
            STRING_AGG(shop_name, ', ' ORDER BY shop_name) as shop_names
        FROM coffee_operations.coffee_shops
        GROUP BY time_zone
        ORDER BY shop_count DESC
    """)
    
    tz_stats = cur.fetchall()
    
print("🕐 Shops by Time Zone:")
for tz, count, names in tz_stats:
    print(f"\n{tz} ({count} shops):")
    print(f"   {names}")

## Load Data into Pandas for Advanced Analysis

In [None]:
# Load all data into pandas DataFrame
query = """
    SELECT shop_id, shop_name, city, state_province, country, 
           time_zone, latitude, longitude, seating_capacity, 
           is_premium_location, is_active, created_at
    FROM coffee_operations.coffee_shops
    ORDER BY shop_name
"""

df = pd.read_sql_query(query, conn)
print(f"📊 Loaded {len(df)} rows into DataFrame")
df.head()

In [None]:
# Summary statistics
print("📈 Summary Statistics:")
print(df.describe())

print("\n🌍 Geographic Coverage:")
print(f"   Countries: {df['country'].nunique()}")
print(f"   Cities: {df['city'].nunique()}")
print(f"   Time Zones: {df['time_zone'].nunique()}")

In [None]:
# Close connection
conn.close()
print("\n✅ Query session complete!")

## What You've Accomplished

✅ **Connected** to Lakebase PostgreSQL database  
✅ **Explored** coffee shop data across multiple countries  
✅ **Analyzed** premium vs standard location patterns  
✅ **Examined** geographic and time zone distributions  
✅ **Loaded** data into pandas for advanced analytics  

This demonstrates how Lakebase provides both operational database capabilities and analytical querying power in one unified platform!