In [1]:
import os
import asyncio
import aiohttp
import json
from dotenv import load_dotenv

load_dotenv()

class IndexSchemaChecker:
    def __init__(self):
        self.base_url = os.getenv("AZURE_AI_SEARCH_ENDPOINT")
        self.api_key = os.getenv("AZURE_AI_SEARCH_API_KEY")
        self.api_version = "2023-11-01"
        
        self.headers = {
            "Content-Type": "application/json",
            "api-key": self.api_key
        }
        
        print(f"Base URL: {self.base_url}")
        print(f"API Key: {self.api_key[:10]}..." if self.api_key else "No API Key")

    async def get_index_schema(self, index_name: str):
        """Get index schema information"""
        url = f"{self.base_url}/indexes/{index_name}?api-version={self.api_version}"
        
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(url, headers=self.headers) as response:
                    if response.status == 200:
                        schema = await response.json()
                        return schema
                    else:
                        error_text = await response.text()
                        print(f"Error {response.status}: {error_text}")
                        return None
        except Exception as e:
            print(f"Exception getting schema for {index_name}: {e}")
            return None

    async def get_index_stats(self, index_name: str):
        """Get index statistics"""
        url = f"{self.base_url}/indexes/{index_name}/stats?api-version={self.api_version}"
        
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(url, headers=self.headers) as response:
                    if response.status == 200:
                        stats = await response.json()
                        return stats
                    else:
                        error_text = await response.text()
                        print(f"Error getting stats {response.status}: {error_text}")
                        return None
        except Exception as e:
            print(f"Exception getting stats for {index_name}: {e}")
            return None

    async def test_search_sample(self, index_name: str):
        """Test search with select all fields"""
        url = f"{self.base_url}/indexes/{index_name}/docs/search.post.search?api-version={self.api_version}"
        
        payload = {
            "search": "*",
            "select": "*",
            "top": 1
        }
        
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(url, headers=self.headers, json=payload) as response:
                    if response.status == 200:
                        result = await response.json()
                        return result
                    else:
                        error_text = await response.text()
                        print(f"Error testing search {response.status}: {error_text}")
                        return None
        except Exception as e:
            print(f"Exception testing search for {index_name}: {e}")
            return None

    def print_schema_info(self, index_name: str, schema: dict):
        """Print formatted schema information"""
        print(f"\n{'='*60}")
        print(f"INDEX: {index_name}")
        print(f"{'='*60}")
        
        if not schema:
            print("‚ùå No schema data available")
            return
            
        # Basic info
        print(f"Name: {schema.get('name', 'N/A')}")
        print(f"Default Scoring Profile: {schema.get('defaultScoringProfile', 'N/A')}")
        
        # Fields
        fields = schema.get('fields', [])
        print(f"\nüìã FIELDS ({len(fields)} total):")
        print("-" * 80)
        print(f"{'Field Name':<25} {'Type':<15} {'Searchable':<10} {'Filterable':<10} {'Retrievable':<10}")
        print("-" * 80)
        
        for field in fields:
            name = field.get('name', '')
            field_type = field.get('type', '')
            searchable = '‚úÖ' if field.get('searchable', False) else '‚ùå'
            filterable = '‚úÖ' if field.get('filterable', False) else '‚ùå'
            retrievable = '‚úÖ' if field.get('retrievable', False) else '‚ùå'
            
            print(f"{name:<25} {field_type:<15} {searchable:<10} {filterable:<10} {retrievable:<10}")
        
        # Semantic configurations
        semantic_configs = schema.get('semantic', {}).get('configurations', [])
        if semantic_configs:
            print(f"\nüîç SEMANTIC CONFIGURATIONS:")
            for config in semantic_configs:
                print(f"  - {config.get('name', 'N/A')}")
        
        # Scoring profiles
        scoring_profiles = schema.get('scoringProfiles', [])
        if scoring_profiles:
            print(f"\nüìä SCORING PROFILES:")
            for profile in scoring_profiles:
                print(f"  - {profile.get('name', 'N/A')}")

    def print_stats_info(self, index_name: str, stats: dict):
        """Print formatted statistics information"""
        if not stats:
            print("‚ùå No stats data available")
            return
            
        print(f"\nüìà STATISTICS:")
        print(f"Document Count: {stats.get('documentCount', 'N/A'):,}")
        print(f"Storage Size: {stats.get('storageSize', 'N/A'):,} bytes")

    def print_sample_document(self, index_name: str, search_result: dict):
        """Print sample document fields"""
        if not search_result or not search_result.get('value'):
            print("‚ùå No sample document available")
            return
            
        sample_doc = search_result['value'][0]
        print(f"\nüìÑ SAMPLE DOCUMENT FIELDS:")
        print("-" * 40)
        
        for key, value in sample_doc.items():
            if key.startswith('@'):
                continue
            value_preview = str(value)[:100] + "..." if len(str(value)) > 100 else str(value)
            print(f"{key:<20}: {value_preview}")

# Function untuk run di notebook
async def check_indexes():
    checker = IndexSchemaChecker()
    
    # Index names to check
    indexes = [
        "title_index",
        "index_typea_cz_5000_co_500_prod_2"
    ]
    
    for index_name in indexes:
        print(f"\nüîç Checking index: {index_name}")
        
        # Get schema
        schema = await checker.get_index_schema(index_name)
        checker.print_schema_info(index_name, schema)
        
        # Get stats
        stats = await checker.get_index_stats(index_name)
        checker.print_stats_info(index_name, stats)
        
        # Get sample document
        sample = await checker.test_search_sample(index_name)
        checker.print_sample_document(index_name, sample)
        
        print("\n" + "="*60)

# Run di notebook cell
await check_indexes()

Base URL: https://chatbot-license-ai-search.search.windows.net
API Key: ibjuTxx5kG...

üîç Checking index: title_index

INDEX: title_index
Name: title_index
Default Scoring Profile: test

üìã FIELDS (6 total):
--------------------------------------------------------------------------------
Field Name                Type            Searchable Filterable Retrievable
--------------------------------------------------------------------------------
id                        Edm.String      ‚ùå          ‚ùå          ‚úÖ         
title                     Edm.String      ‚úÖ          ‚úÖ          ‚úÖ         
titleWithExtension        Edm.String      ‚úÖ          ‚úÖ          ‚úÖ         
filepath                  Edm.String      ‚úÖ          ‚úÖ          ‚úÖ         
titleVector               Collection(Edm.Single) ‚úÖ          ‚ùå          ‚ùå         
uniqueID                  Edm.String      ‚ùå          ‚úÖ          ‚úÖ         

üîç SEMANTIC CONFIGURATIONS:
  - test-all
  - content-s

In [2]:
import os
import asyncio
import aiohttp
import json
from dotenv import load_dotenv

load_dotenv()

async def get_index_json(index_name: str):
    """Get raw JSON from Azure AI Search index"""
    base_url = os.getenv("AZURE_AI_SEARCH_ENDPOINT")
    api_key = os.getenv("AZURE_AI_SEARCH_API_KEY")
    api_version = "2023-11-01"
    
    headers = {
        "Content-Type": "application/json",
        "api-key": api_key
    }
    
    # Get index schema
    schema_url = f"{base_url}/indexes/{index_name}?api-version={api_version}"
    
    # Get sample document
    search_url = f"{base_url}/indexes/{index_name}/docs/search.post.search?api-version={api_version}"
    search_payload = {
        "search": "*",
        "select": "*",
        "top": 1
    }
    
    async with aiohttp.ClientSession() as session:
        # Get schema
        print(f"üîç INDEX SCHEMA: {index_name}")
        print("="*50)
        async with session.get(schema_url, headers=headers) as response:
            if response.status == 200:
                schema_json = await response.json()
                print(json.dumps(schema_json, indent=2, ensure_ascii=False))
            else:
                print(f"‚ùå Error {response.status}: {await response.text()}")
        
        print(f"\nüìÑ SAMPLE DOCUMENT: {index_name}")
        print("="*50)
        # Get sample document
        async with session.post(search_url, headers=headers, json=search_payload) as response:
            if response.status == 200:
                sample_json = await response.json()
                print(json.dumps(sample_json, indent=2, ensure_ascii=False))
            else:
                print(f"‚ùå Error {response.status}: {await response.text()}")

# Check kedua index
async def check_all():
    indexes = [
        "title_index",
        "index_typea_cz_5000_co_500_prod_2"
    ]
    
    for index_name in indexes:
        await get_index_json(index_name)
        print("\n" + "="*80 + "\n")

# Run
await check_all()

üîç INDEX SCHEMA: title_index
{
  "@odata.context": "https://chatbot-license-ai-search.search.windows.net/$metadata#indexes/$entity",
  "@odata.etag": "\"0x8DE294B6C1484A4\"",
  "name": "title_index",
  "defaultScoringProfile": "test",
  "fields": [
    {
      "name": "id",
      "type": "Edm.String",
      "searchable": false,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": true,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "title",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "id.microsoft",
      "dimensions": null,
      "vectorSearchProfile": null,
      "s