# AI Search - Reindex specific documents in pull mode

In [1]:
import os
from dotenv import load_dotenv

load_dotenv(override=True) # take environment variables from .env.

from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential

endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
credential = AzureKeyCredential(os.environ["AZURE_SEARCH_ADMIN_KEY"]) if len(os.environ["AZURE_SEARCH_ADMIN_KEY"]) > 0 else DefaultAzureCredential()
index_name = os.environ["AZURE_SEARCH_INDEX"]

blob_connection_string = os.environ["BLOB_CONNECTION_STRING"]
blob_container_name = os.environ["BLOB_CONTAINER_NAME"]

### 'Touch' blobs to be re-indexed by indexer 

In [4]:
import datetime

def touch_blob(_blob_name, _container_client):
    # update metadata on blob to trigger indexing
    print(f'Touching metadata of blob: {_blob_name}')
    blob_client = _container_client.get_blob_client(_blob_name)
    blob_client.set_blob_metadata(metadata={'reprocess': f'{datetime.datetime.now()}'})


In [5]:
from azure.storage.blob import BlobServiceClient  

# loop through blobs and update metadata to trigger indexing

# Connect to Blob Storage
blob_service_client = BlobServiceClient.from_connection_string(blob_connection_string)
container_client = blob_service_client.get_container_client(blob_container_name)

blobs_to_reindex = [blob for blob in container_client.list_blob_names()]

for _blob_name in blobs_to_reindex:
    touch_blob(_blob_name, container_client)

Touching metadata of blob: 266 # Christoph Adami on How Information Makes Sense of Biology.txt


### Optional manually run indexer now

In [7]:
from azure.search.documents.indexes import SearchIndexerClient

indexer_name = f"{index_name}-indexer"  
indexer_client = SearchIndexerClient(endpoint, credential)  

# run indexer
print(f'Running {indexer_name}...')
indexer_client.run_indexer(indexer_name)  

Running testindex150424v4-indexer...


In [8]:
# Wait for indexer to finish

def wait_for_indexer(seconds=15):
    import time 
    while indexer_client.get_indexer_status(indexer_name).last_result == None or indexer_client.get_indexer_status(indexer_name).last_result.status == 'inProgress':
        print('Indexer running...')
        time.sleep(seconds)
        
    print(indexer_client.get_indexer_status(indexer_name).last_result.status)

wait_for_indexer(5)

success
