# Index Restoration Utility

This notebook restores the pre-configured Azure AI Search indexes (`hrdocs` and `healthdocs`) used throughout the LAB511 workshop. The indexes contain pre-processed documents with embeddings and semantic configurations, so you can focus on learning Knowledge Bases APIs rather than data preparation.

In [5]:
import json
import os
import traceback

from azure.core.credentials import AzureKeyCredential
from azure.search.documents.aio import SearchClient
from azure.search.documents.indexes.aio import SearchIndexClient
from azure.search.documents.indexes.models import SearchIndex


async def restore_index(endpoint: str, index_name: str, index_file: str, records_file: str, azure_openai_endpoint: str):
    default_path = r"../data/index-data"
    log_message = print
    try:
        log_message(f"[{index_name}] Starting index restoration...")
        
        # Create or update index
        credential = AzureKeyCredential(os.environ["AZURE_SEARCH_ADMIN_KEY"])
        async with SearchIndexClient(endpoint=endpoint, credential=credential) as client:
            index_file_path = os.path.join(default_path, index_file)
            log_message(f"[{index_name}] Reading index definition from: {index_file_path}")
            
            with open(index_file_path, "r", encoding="utf-8") as in_file:
                index_data = json.load(in_file)
                index = SearchIndex.deserialize(index_data)
                index.name = index_name
                index.vector_search.vectorizers[0].parameters.resource_url = azure_openai_endpoint
                # Use search service's system-assigned managed identity (no API key needed)
                index.vector_search.vectorizers[0].parameters.api_key = None
                log_message(f"[{index_name}] Creating/updating index in Azure AI Search...")
                await client.create_or_update_index(index)
                log_message(f"[{index_name}] Index created/updated successfully")

        # Upload documents
        async with SearchClient(endpoint=endpoint, index_name=index_name, credential=credential) as client:
            records_file_path = os.path.join(default_path, records_file)
            log_message(f"[{index_name}] Reading documents from: {records_file_path}")
            
            records = []
            total_uploaded = 0
            batch_count = 0
            
            with open(records_file_path, "r", encoding="utf-8") as in_file:
                for line_num, line in enumerate(in_file, 1):
                    try:
                        record = json.loads(line)
                        records.append(record)
                        
                        if len(records) >= 100:
                            batch_count += 1
                            log_message(f"[{index_name}] Uploading batch #{batch_count} ({len(records)} documents)...")
                            await client.upload_documents(documents=records)
                            total_uploaded += len(records)
                            records = []
                    except json.JSONDecodeError as e:
                        log_message(f"[{index_name}] Warning: Skipping invalid JSON on line {line_num}: {e}")
                        continue

            # Upload any remaining documents
            if records:
                batch_count += 1
                log_message(f"[{index_name}] Uploading final batch #{batch_count} ({len(records)} documents)...")
                await client.upload_documents(documents=records)
                total_uploaded += len(records)

            log_message(f"[{index_name}] Total documents uploaded: {total_uploaded}")
            log_message(f"[{index_name}] Restoration completed successfully!")

    except Exception as e:
        log_message(f"[{index_name}] Error during restoration: {e}")
        traceback.print_exc()

In [6]:
from dotenv import load_dotenv

load_dotenv(override=True) # take environment variables from .env.

azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
azure_openai_chatgpt_deployment = os.getenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "gpt-4.1")
azure_openai_chatgpt_model_name = os.getenv("AZURE_OPENAI_CHATGPT_MODEL_NAME", "gpt-4.1")

In [7]:
# Restore hrdocs index
print("\n--- Processing hrdocs index ---")
await restore_index(
    endpoint, 
    "hrdocs", 
    "index.json", 
    "hrdocs-exported.jsonl", 
    azure_openai_endpoint
)


--- Processing hrdocs index ---
[hrdocs] Starting index restoration...
[hrdocs] Reading index definition from: ../data/index-data/index.json
[hrdocs] Creating/updating index in Azure AI Search...
[hrdocs] Index created/updated successfully
[hrdocs] Reading documents from: ../data/index-data/hrdocs-exported.jsonl
[hrdocs] Uploading final batch #1 (50 documents)...
[hrdocs] Total documents uploaded: 50
[hrdocs] Restoration completed successfully!


In [8]:
# Restore healthdocs index
print("\n--- Processing healthdocs index ---")
await restore_index(
    endpoint, 
    "healthdocs", 
    "index.json", 
    "healthdocs-exported.jsonl", 
    azure_openai_endpoint 
)


--- Processing healthdocs index ---
[healthdocs] Starting index restoration...
[healthdocs] Reading index definition from: ../data/index-data/index.json
[healthdocs] Creating/updating index in Azure AI Search...
[healthdocs] Index created/updated successfully
[healthdocs] Reading documents from: ../data/index-data/healthdocs-exported.jsonl
[healthdocs] Uploading batch #1 (100 documents)...
[healthdocs] Uploading batch #2 (100 documents)...
[healthdocs] Uploading batch #3 (100 documents)...
[healthdocs] Uploading final batch #4 (34 documents)...
[healthdocs] Total documents uploaded: 334
[healthdocs] Restoration completed successfully!


## Next Steps

Once both indexes are restored successfully, you can proceed with the LAB511 workshop notebooks.

➡️ Start with [Part 1: Single Knowledge Source - HR Docs](part1-single-knowledge-source-hr-docs.ipynb) to begin your journey into building advanced knowledge bases with Azure AI Search!