# Create Azure Search Index

## In order to create an index in Azure Search, we need to run 3 main steps
1. To create a connection to a Data Source which will store the document we want to index 
2. Create an index which will contain the schema we need
3. Create an indexer which will be responsible for running and maintaining the index

In [1]:
"""
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
"""
# import dependencies
import os
import json
from requests import post, put 
import pandas as pd

from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient 
from azure.search.documents import SearchClient
from azure.search.documents.indexes.models import (
    ComplexField,
    CorsOptions,
    SearchIndex,
    ScoringProfile,
    SearchFieldDataType,
    SimpleField,
    SearchableField
)

## Setting Env Variables untill the bash script is ready

In [22]:
os.environ["STG_ACS_RESOURCE_NAME"] = "search-stg-9ki9"
os.environ["STG_ACS_RESOURCE_KEY"] = "" 
os.environ["STG_COSMOS_RESOURCE_NAME"] = "cosmosdb-stg-9ki9"
os.environ["STG_COSMOS_KEY"] = "" 

In [3]:
#search service connection info
azure_search_service = os.environ.get('STG_ACS_RESOURCE_NAME')
admin_key = os.environ.get('STG_ACS_RESOURCE_KEY') 
cosmos_resource = os.environ.get('STG_COSMOS_RESOURCE_NAME')
cosmos_key = os.environ.get('STG_COSMOS_KEY') 

### 1. Generate The Data Source object. In our case it's a connection to CosmosDB

In [7]:
# configure variables
API_VERSION = "2021-04-30-Preview"
cosmos_container = 'insights'
cosmos_data_source_name = 'waldo-cosmosdb-datasource'
database = 'waldo'
data_source_index = {
  "@odata.context": f"https://{azure_search_service}.search.windows.net/$metadata#datasources/$entity",
  "@odata.etag": "\"0x8DA4566CD803FB3\"",
  "name": cosmos_data_source_name,
  "description": "Platinum data source containing searchable documents for videos",
  "type": "cosmosdb",
  "subtype": None,
  "credentials": {
    "connectionString": f"AccountEndpoint=https://{cosmos_resource}.documents.azure.com;AccountKey={cosmos_key};Database={database};"
  },
  "container": {
    "name": cosmos_container,
    "query": None
  },
  "dataChangeDetectionPolicy": {
    "@odata.type": "#Microsoft.Azure.Search.HighWaterMarkChangeDetectionPolicy",
    "highWaterMarkColumnName": "_ts"
    },
  "dataDeletionDetectionPolicy": None,
  "encryptionKey": None,
  "identity": None
}

In [8]:
def request_to_azure_search(json_file, feature, name="", api_version='2021-04-30-Preview'):
    """
        This Funcions Creates a Rest Call to Azure search indexer with multiple endpoints
        * index - to create a new Index based on configuration
        * indexers - create a new Indexer based on configuration
        * datasources - create a new DataSource based on configuration

    Args:
        json_file (_type_): configuation to pass with the call
        feature (_type_): type of endpoint to execute
        name (str, optional): Name of the operation to pass. Defaults to "".
        api_version (str, optional): API version to use in calls. Defaults to '2021-04-30-Preview'.
    """
    
    headers = {
    "api-key": admin_key,     
    "Content-Type": "application/json",

    }
    endpoint = f"https://{azure_search_service}.search.windows.net/".format(azure_search_service)

    if (name==""): name = json_file['name']
    try:
        url = endpoint +"/" + feature + f"?api-version={api_version}"
        resp = post(url=url, json=json_file, headers=headers)

        if resp.status_code == 403:
            print("Authorisation Failed: Check that your API KEY value is correct")

        if resp.status_code == 400:
            print(f"Error", resp.text)    

        if resp.status_code == 201:
            print(f"Success creating {feature}" + name)

    except Exception as e:
        print(f'Exception creating {feature} ' +  name, e, resp.status_code)

#### Call to create Data Source

In [9]:
try:
    request_to_azure_search(data_source_index, feature="datasources")
except Exception as e:
    print(e)

Success creating datasourceswaldo-cosmosdb-datasource


### 2. Let's create an index using a pre-configured schema.
The key in this index is `videoId`, which is the unique identifier returned by VI. 

In [10]:
## Read json file
def load_json_file (filename):
    with open(filename) as f:
        parsed_file = json.loads(f.read())
    return parsed_file

#### Call the create the index

In [12]:
index_json_content = load_json_file('src/python/common/enrichment/search_index_creation/media_enrichment_index_schema.json')
granularity = 'videos' # other options maybe ['shots','scenes','videos']
index_json_content['name'] = index_json_content['name'] + '-v1-' + granularity

try:
    request_to_azure_search(index_json_content, feature="indexes")
except Exception as e:
    print(e)

Success creating indexeswaldo-v1-videos


### 3. Let's create the indexer and to read new documents from the Data Source
We'll set incremental load to every 5 minutes.
Every document with the same document id i.e `videoId`, will be updated in the index, all new documents will be inserted

In [13]:
indexer_name = f"indexer-{index_json_content['name']}-5m"
print(indexer_name)
first_index_date = "2022-07-24T00:00:00.048Z"

indexer_config = {
  "@odata.context": f"https://{azure_search_service}.search.windows.net/$metadata#indexers/$entity",
  "@odata.etag": "\"0x8DA478D2D0D1A4A\"",
  "name": indexer_name,
  "description": "5 minute incremental indexer on CosmosDB",
  "dataSourceName": cosmos_data_source_name,
  "skillsetName": None,
  "targetIndexName": index_json_content['name'],
  "disabled": None,
  "schedule": {
    "interval": "PT5M",
    "startTime": first_index_date
  },
  "parameters": {
    "batchSize": None,
    "maxFailedItems": None,
    "maxFailedItemsPerBatch": None,
    "base64EncodeKeys": None,
    "configuration": {}
  },
  "fieldMappings": [],
  "outputFieldMappings": [],
  "cache": None,
  "encryptionKey": None
}

indexer-waldo-v1-videos-5m


#### Call to create incremental indexer

In [14]:
try:
    request_to_azure_search(indexer_config, feature="indexers")
except Exception as e:
    print(e)

Success creating indexersindexer-waldo-v1-videos-5m


### 4. Add index Alias to allow Index dropping without downtime
!! you can't drop an index before you delete the alias itself

In [16]:
alias_configuration = {
    "name": f"alias-waldo-{granularity}",
    "indexes": [index_json_content['name']]
}
alias_configuration

{'name': 'alias-waldo-videos', 'indexes': ['waldo-v1-videos']}

In [21]:
try:
    request_to_azure_search(alias_configuration, feature="aliases")
except Exception as e:
    print(e)

Success creating aliasesalias-waldo-videos


## Delete an Alias

In [20]:

import requests
headers = {
    "api-key": admin_key,     
    "Content-Type": "application/json",

    }
alias_name = alias_configuration['name']
endpoint = f"https://{azure_search_service}.search.windows.net/".format(azure_search_service)
url = endpoint +f"aliases/{alias_name}" +f"?api-version={API_VERSION}"
resp = requests.delete(url=url,  headers=headers)

In [19]:
resp

<Response [204]>