# Create a Genie Space

Let's create a Genie space together! I'll need some information from you:

## 1. Purpose & Audience
* **What business questions should this space answer?** (e.g., sales analytics, customer insights, inventory tracking)
* **Who will use this space?** (analysts, executives, data scientists, etc.)
* **What data domain does this cover?** (sales, finance, operations, marketing, etc.)

## 2. Data Sources
* **Which Unity Catalog tables should be included?** Please provide full paths like `catalog.schema.table`
* If you're not sure, I can help you explore your available tables

## 3. Configuration
* **Serverless SQL Warehouse ID** (required for Genie spaces)
* **Workspace location** where you want to create the space (e.g., `/Users/your.email@company.com/genie`)
* **Title** for the space
* **Description** (brief summary of the space's purpose)

---

Please provide the information above, and I'll help you create the Genie space!

In [0]:
import requests

# Get workspace URL and authentication token
workspace_url = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()
token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

print(f"Workspace URL: {workspace_url}\n")

# List SQL warehouses
response = requests.get(
    f"{workspace_url}/api/2.0/sql/warehouses",
    headers={"Authorization": f"Bearer {token}"}
)

if response.status_code == 200:
    warehouses = response.json().get("warehouses", [])
    serverless_warehouses = [wh for wh in warehouses if wh.get("enable_serverless_compute", False)]
    
    if serverless_warehouses:
        print(f"Found {len(serverless_warehouses)} serverless SQL warehouse(s):\n")
        for wh in serverless_warehouses:
            print(f"  Name: {wh['name']}")
            print(f"  ID: {wh['id']}")
            print(f"  State: {wh['state']}")
            print(f"  Size: {wh.get('cluster_size', 'N/A')}")
            print("-" * 60)
    else:
        print("‚ö†Ô∏è No serverless SQL warehouses found.")
        print("\nNote: Genie spaces require a serverless SQL warehouse.")
        print("You may need to create one in the SQL Warehouses UI.")
else:
    print(f"‚ùå Failed to list warehouses: {response.status_code}")
    print(f"Error: {response.text}")

In [0]:
# List tables in users.sean_zhang schema
tables_df = spark.sql("SHOW TABLES IN users.sean_zhang")
display(tables_df)

In [0]:
# Examine key vaccine tables
vaccine_tables = [
    'dim_product',
    'dim_site', 
    'fact_country_supply',
    'fact_current_inventory',
    'fact_production_shipment',
    'v_vaccine_availability',
    'v_country_coverage',
    'v_inventory_health',
    'v_manufacturing_status'
]

for table in vaccine_tables:
    print(f"\n{'='*60}")
    print(f"Table: users.sean_zhang.{table}")
    print('='*60)
    try:
        df = spark.sql(f"DESCRIBE TABLE users.sean_zhang.{table}")
        display(df)
    except Exception as e:
        print(f"Error: {e}")

In [0]:
import json
import requests
import secrets

# Get authentication credentials
workspace_url = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()
token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

# Generate unique 32-character hex IDs
question_ids = [secrets.token_hex(16) for _ in range(5)]
example_sql_ids = [secrets.token_hex(16) for _ in range(5)]
instruction_id = secrets.token_hex(16)

# Sort IDs alphabetically (required by API)
question_ids.sort()
example_sql_ids.sort()

# Define 5 business questions with corresponding SQL queries
questions_and_sqls = [
    {
        "question": "Which countries have the lowest vaccine availability and highest stockout rates?",
        "sql": [
            "SELECT\n",
            "  country_name,\n",
            "  antigen,\n",
            "  vaccine_availability_pct,\n",
            "  avg_stockout_days,\n",
            "  availability_status\n",
            "FROM users.sean_zhang.v_vaccine_availability\n",
            "WHERE vaccine_availability_pct < 90\n",
            "ORDER BY vaccine_availability_pct ASC\n",
            "LIMIT 10"
        ]
    },
    {
        "question": "What is the immunization coverage gap by country and antigen?",
        "sql": [
            "SELECT\n",
            "  country_name,\n",
            "  antigen,\n",
            "  avg_coverage_pct,\n",
            "  target_coverage_pct,\n",
            "  coverage_gap_pct,\n",
            "  gap_severity\n",
            "FROM users.sean_zhang.v_country_coverage\n",
            "WHERE coverage_gap_pct > 0\n",
            "ORDER BY coverage_gap_pct DESC"
        ]
    },
    {
        "question": "Which products and sites have inventory stockout risks?",
        "sql": [
            "SELECT\n",
            "  product,\n",
            "  site,\n",
            "  avg_days_of_supply,\n",
            "  stockout_risk_pct,\n",
            "  inventory_status\n",
            "FROM users.sean_zhang.v_inventory_health\n",
            "WHERE inventory_status IN ('CRITICAL', 'LOW')\n",
            "ORDER BY stockout_risk_pct DESC"
        ]
    },
    {
        "question": "What is the manufacturing performance by product and site?",
        "sql": [
            "SELECT\n",
            "  product,\n",
            "  site,\n",
            "  avg_yield_pct,\n",
            "  otif_pct,\n",
            "  avg_lead_time_days,\n",
            "  excursion_rate_pct,\n",
            "  yield_status,\n",
            "  otif_status\n",
            "FROM users.sean_zhang.v_manufacturing_status\n",
            "ORDER BY otif_pct DESC"
        ]
    },
    {
        "question": "Show me vaccine supply pricing and volume by country and supplier",
        "sql": [
            "SELECT\n",
            "  country_name,\n",
            "  antigen,\n",
            "  supplier,\n",
            "  channel,\n",
            "  price_per_dose,\n",
            "  volume_doses,\n",
            "  actual_coverage_pct,\n",
            "  has_stockout\n",
            "FROM users.sean_zhang.fact_country_supply\n",
            "ORDER BY volume_doses DESC"
        ]
    }
]

# Build configuration with sample questions and example SQL queries
config = {
    "version": 2,
    "config": {
        "sample_questions": [
            {"id": question_ids[i], "question": [questions_and_sqls[i]["question"]]}
            for i in range(5)
        ]
    },
    "data_sources": {
        "tables": sorted([
            {"identifier": "users.sean_zhang.dim_product"},
            {"identifier": "users.sean_zhang.dim_site"},
            {"identifier": "users.sean_zhang.fact_country_supply"},
            {"identifier": "users.sean_zhang.fact_current_inventory"},
            {"identifier": "users.sean_zhang.fact_production_shipment"},
            {"identifier": "users.sean_zhang.v_vaccine_availability"},
            {"identifier": "users.sean_zhang.v_country_coverage"},
            {"identifier": "users.sean_zhang.v_inventory_health"},
            {"identifier": "users.sean_zhang.v_manufacturing_status"}
        ], key=lambda x: x["identifier"])
    },
    "instructions": {
        "text_instructions": [
            {
                "id": instruction_id,
                "content": [
                    "This space analyzes global vaccine supply chain data including availability, coverage, inventory, and manufacturing performance.",
                    "Coverage gap = target_coverage_pct - actual_coverage_pct. Higher gaps indicate underserved populations.",
                    "OTIF (On-Time In-Full) measures delivery performance. Target is >95%.",
                    "Vaccine availability is measured as percentage of days with stock available (target: >95%).",
                    "Days of supply <30 indicates potential stockout risk."
                ]
            }
        ],
        "example_question_sqls": sorted([
            {
                "id": example_sql_ids[i],
                "question": [questions_and_sqls[i]["question"]],
                "sql": questions_and_sqls[i]["sql"]
            }
            for i in range(5)
        ], key=lambda x: x["id"])
    }
}

# Create the Genie space
response = requests.post(
    f"{workspace_url}/api/2.0/genie/spaces",
    headers={"Authorization": f"Bearer {token}"},
    json={
        "serialized_space": json.dumps(config),
        "warehouse_id": "<your-warehouse-id>",  # Shared Endpoint
        "parent_path": "/Users/<your-email>@<your-domain>.com",
        "title": "Vaccine Analytics",
        "description": "Analyze global vaccine supply chain performance, availability, coverage gaps, inventory health, and manufacturing metrics"
    }
)

# Handle response
if response.status_code == 200:
    space_data = response.json()
    space_id = space_data.get("space_id")
    print(f"‚úÖ Successfully created Vaccine Analytics Genie space!")
    print(f"\nSpace ID: {space_id}")
    print(f"\nAccess your space here:")
    print(f"{workspace_url}/genie/rooms/{space_id}")
    print(f"\nüìä Configured with:")
    print(f"  ‚Ä¢ 5 sample questions")
    print(f"  ‚Ä¢ 5 example SQL queries")
    print(f"  ‚Ä¢ 9 vaccine supply chain tables")
    print(f"  ‚Ä¢ Domain-specific instructions")
else:
    print(f"‚ùå Failed to create space: {response.status_code}")
    print(f"\nError details:")
    print(response.text)