# Azure AI Search Connection Test

This notebook tests the environment setup and connects to Azure AI Search to list existing indexes using DefaultAzureCredential for secure authentication.

In [None]:
# Import Required Libraries
import os
from dotenv import load_dotenv
from azure.search.documents.indexes import SearchIndexClient
from azure.identity import DefaultAzureCredential

In [None]:
# Load Environment Variables
load_dotenv()

# Verify environment variables are loaded
endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
tenant_id = os.getenv("AZURE_TENANT_ID")

print(f"Search Endpoint: {endpoint}")
print(f"Tenant ID: {tenant_id}")
print("Environment variables loaded successfully!" if endpoint else "Failed to load environment variables")

In [None]:
# Create Azure AI Search Client with DefaultAzureCredential
try:
    # Initialize DefaultAzureCredential (uses Azure CLI login)
    credential = DefaultAzureCredential()
    
    # Create SearchIndexClient for managing indexes
    search_client = SearchIndexClient(
        endpoint=endpoint,
        credential=credential
    )
    
    print("Azure AI Search client created successfully using DefaultAzureCredential!")
    
except Exception as e:
    print(f"Error creating search client: {e}")

In [None]:
# List Existing Indexes
try:
    # Get all indexes from the Azure AI Search service
    indexes = search_client.list_indexes()
    
    print("Existing indexes in Azure AI Search:")
    print("-" * 40)
    
    index_list = list(indexes)
    if index_list:
        for index in index_list:
            print(f"Index Name: {index.name}")
            print(f"Fields Count: {len(index.fields)}")
            print(f"Document Count: {index.document_count if hasattr(index, 'document_count') else 'N/A'}")
            print("-" * 40)
        
        print(f"\nTotal indexes found: {len(index_list)}")
    else:
        print("No indexes found in this Azure AI Search service.")
        
except Exception as e:
    print(f"Error listing indexes: {e}")
    print("Make sure you have the necessary permissions on the Azure AI Search service.")

In [None]:
# Create Azure AI Search Index with Document-Level Access Control
from azure.search.documents.indexes.models import SearchField, SearchIndex, PermissionFilter, SearchIndexPermissionFilterOption

# Define index name
index_name = "document-access-control-demo"

try:
    # Define the index schema with document-level access control support
    index_schema = SearchIndex(
        name=index_name,
        fields=[
            # Key field
            SearchField(
                name="doc_id", 
                type="Edm.String", 
                key=True, 
                retrievable=True
            ),
            # Content field
            SearchField(
                name="content", 
                type="Edm.String", 
                retrievable=True, 
                searchable=True
            ),
            # Metadata fields
            SearchField(
                name="metadata_storage_name", 
                type="Edm.String", 
                retrievable=True, 
                searchable=True, 
                sortable=True
            ),
            SearchField(
                name="metadata_storage_path", 
                type="Edm.String", 
                retrievable=True, 
                searchable=True
            ),
            # Access control field with permission filter for group-based access
            SearchField(
                name="groupIds", 
                type="Collection(Edm.String)", 
                retrievable=True, 
                searchable=True, 
                filterable=True,
                permission_filter=PermissionFilter.GROUP_IDS
            )
        ],
        # Enable permission filtering at the index level
        permission_filter_option=SearchIndexPermissionFilterOption.ENABLED
    )
    
    # Create the index
    result = search_client.create_index(index=index_schema)
    print(f"✅ Index '{index_name}' created successfully with document-level access control enabled!")
    
    # Verify the index was created by listing it
    updated_indexes = list(search_client.list_indexes())
    created_index = next((idx for idx in updated_indexes if idx.name == index_name), None)
    
    if created_index:
        print(f"✅ Index '{index_name}' confirmed in search service")
        print(f"   Fields count: {len(created_index.fields)}")
        print(f"   Permission filter enabled: {created_index.permission_filter_option == SearchIndexPermissionFilterOption.ENABLED}")
    else:
        print(f"❌ Could not find index '{index_name}' in the list")
        
except Exception as e:
    print(f"❌ Error creating index: {e}")
    if "already exists" in str(e).lower():
        print(f"Index '{index_name}' already exists. Consider using a different name or deleting the existing index.")

In [None]:
# Create hardcoded list of files with associated group permissions
# Structure: ('filename', ['group1', 'group2', ...])

document_permissions = [
    ('Global_Sales_Report_2024.pdf', ['7490b58b-1ab8-43c1-aab1-de1929d66175', '70016de2-e232-4c31-8aec-111c31f42e3c']),
    ('Marketing_Insights_EMEA_2025.pdf', ['7490b58b-1ab8-43c1-aab1-de1929d66175']),
    ('Travel_and_Expenses_Policies_2025.pdf', ['all'])
]

print("Document permissions mapping:")
print("=" * 50)
for filename, groups in document_permissions:
    print(f"File: {filename}")
    print(f"Groups: {', '.join(groups)}")
    print("-" * 30)

print(f"\nTotal documents: {len(document_permissions)}")

In [None]:
# Process and Upload Documents to Azure AI Search Index
import base64
import os
from pathlib import Path
import PyPDF2
from azure.search.documents import SearchClient

# Initialize SearchClient for document operations
document_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

# Directory containing the PDF files
docs_directory = "docs"

def extract_text_from_pdf(file_path):
    """Extract text content from PDF file using PyPDF2"""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text_content = ""
            
            for page_num, page in enumerate(pdf_reader.pages):
                text_content += page.extract_text() + "\n"
            
            return text_content.strip()
    except Exception as e:
        print(f"Error extracting text from {file_path}: {e}")
        return ""

def create_doc_id(file_path):
    """Create base64 encoded doc_id from document path"""
    return base64.b64encode(file_path.encode('utf-8')).decode('utf-8')

# Process each document and prepare for upload
documents_to_upload = []

print("Processing documents:")
print("=" * 60)

for filename, group_ids in document_permissions:
    file_path = os.path.join(docs_directory, filename)
    full_path = os.path.abspath(file_path)
    
    if os.path.exists(file_path):
        print(f"Processing: {filename}")
        
        # Extract text content from PDF
        content = extract_text_from_pdf(file_path)
        content_preview = content[:200] + "..." if len(content) > 200 else content
        
        # Create document for upload
        document = {
            "doc_id": create_doc_id(full_path),
            "content": content,
            "metadata_storage_name": filename,
            "metadata_storage_path": full_path,
            "groupIds": group_ids
        }
        
        documents_to_upload.append(document)
        
        print(f"  ✅ Extracted {len(content)} characters")
        print(f"  📝 Content preview: {content_preview}")
        print(f"  🔑 Doc ID: {document['doc_id']}")
        print(f"  👥 Groups: {', '.join(group_ids)}")
        print("-" * 60)
        
    else:
        print(f"❌ File not found: {file_path}")
        print("-" * 60)

# Upload documents to Azure AI Search
if documents_to_upload:
    try:
        print(f"\n🚀 Uploading {len(documents_to_upload)} documents to index '{index_name}'...")
        result = document_client.upload_documents(documents=documents_to_upload)
        
        print("Upload Results:")
        for item in result:
            status = "✅ Success" if item.succeeded else f"❌ Failed: {item.error_message}"
            print(f"  Document {item.key}: {status}")
            
        print(f"\n✅ Document upload completed!")
        
    except Exception as e:
        print(f"❌ Error uploading documents: {e}")
else:
    print("❌ No documents to upload.")