In [0]:
!pip install --upgrade zia-sdk-python[databricks] 
#%restart_python

In [0]:
%python

# Base 
import datetime

# Import data handling dependencies 
from pyspark.sql import SparkSession

# Import zia-sdk depdendencies 
from neurolabszia import Zia, NLIRResult 
from neurolabszia.utils import to_spark_dataframe

# 1. Get API key securely from Databricks secrets (should first be set)
try:
    api_key = dbutils.secrets.get(scope="neurolabs-api", key="api_key")
except Exception as e:
    raise RuntimeError("Failed to retrieve API key from Databricks secrets. Make sure the secret scope and key are set up.") from e

async def get_paginated_results(
    client: Zia, task_uuid: str, batch_size: int = 10, max_offset: int = 800
) -> list[NLIRResult]:
    """
    Get all results from a task using pagination.

    Args:
        client: Zia client instance
        task_uuid: The UUID of the task
        batch_size: Number of results to fetch per request (default: 10)
        max_offset: Maximum number of images to fetch results for (default: 100)
    Returns:
        List of all NLIRResult objects
    """
    all_results = []
    offset = 0

    print(f"🔍 Fetching paginated results for task: {task_uuid}")
    print(f"📦 Batch size: {batch_size}")

    while True:
        print(f"\n📄 Fetching batch at offset {offset}...")

        # Get a batch of results
        batch = await client.result_management.get_task_results(
            task_uuid=task_uuid, limit=batch_size, offset=offset
        )

        if not batch:
            print(f"✅ No more results found at offset {offset}")
            break

        print(f"✅ Retrieved {len(batch)} results")
        all_results.extend(batch)

        # If we got fewer results than requested, we've reached the end
        if len(batch) < batch_size or offset >= max_offset:
            print(f"✅ Reached end of results (got {len(batch)} < {batch_size})")
            break

        offset += batch_size

    print(f"\n🎉 Total results retrieved: {len(all_results)}")
    return all_results


# Databricks redacts secrets by default, print the first and last 4 characters of the API key 
#print(f"API Key: {api_key[:4]}{'*' * (len(api_key) - 8)}{api_key[-4:]}")

#def load_json(path: str | Path):
#    p = Path(path)
#    with p.open("r", encoding="utf-8") as f:
#        return json.load(f)
# results_path = [f"/Volumes/catalog_integration/neurolabs_catalog/ir_results/ab_results20_{i}.json" for i in range(0,7)]

In [0]:
# Run `get_paginated_results` when you want to get new data via Neurolabs API 
# task-uuid = "xyz"
# client = Zia(api_key)
# Before you update the API key via DB secrets, you can try here first. 
api_key = "your-api-key"
task_uuid = "your-task-uuid"

async def get_all():
    """Main function demonstrating paginated results usage."""

    print("🚀 Zia SDK - Paginated Results Example")
    print("=" * 60)
    all_results = []
    # Initialize client once and reuse it
    async with Zia(api_key) as client:
        # Example 1: Get all results with pagination
        print("\n1️⃣ Getting all paginated results:")
        try:
            all_results = await get_paginated_results(client, task_uuid, batch_size=20)
            print(f"✅ Successfully retrieved {len(all_results)} total results")
        except Exception as e:
            print(f"❌ Error getting all results: {e}")

    return all_results 
    print("\n" + "=" * 60)
    print("🎉 Paginated results example complete!")


#all_results = []
#for results in results_path: 
#    data = load_json(results)
    # Parse into our NLB
    # results = [NLIRResult.model_validate(result) for result in data["items"]]
    # all_results.extend(results)

#print(f"Total results retrieved: {len(all_results)}")

In [0]:
all_results = await get_all()

In [0]:
len(all_results)

In [0]:
from datetime import datetime

# Convert IR Results into Spark Dataframe and Upload to Unity Catalog 

print("Execution started at:", datetime.now())

# 1. Create Spark session
spark = SparkSession.builder.appName("NLIRResultsIngestion").getOrCreate()

# 2. Create Catalog, Schema & Table 
catalog_name = "catalog_integration"
schema_name = "neurolabs_ir_results_examples" 
table_name = "ir_results_customer_account_name"

# 3. Convert NLIRResults -> pd.Dataframe -> Spark Dataframe
#pdf = ir_results_to_dataframe(all_results)
#ir_results_schema = get_spark_schema_from_dataframe(pdf)
df_spark = to_spark_dataframe(all_results, spark)
# df_spark.head(2)

# 4. Write to Databricks Delta table
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}")

df_spark.write.format("delta").mode("overwrite").saveAsTable(f"{catalog_name}.{schema_name}.{table_name}")

print(f"Successfully wrote {df_spark.count()} records to table {schema_name}.{table_name}.") 