In [1]:
import json
import requests
from dotenv import load_dotenv
import os

In [2]:
from dotenv import dotenv_values

config = dotenv_values(".env")

AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
AZURE_OPENAI_KEY = os.getenv('AZURE_OPENAI_KEY')
TEXT_EMBEDDING_ENGINE = os.getenv('TEXT_EMBEDDING_ENGINE')
COG_SEARCH_RESOURCE = os.getenv('COG_SEARCH_RESOURCE')
COG_SEARCH_KEY = os.getenv('COG_SEARCH_KEY')
COG_SEARCH_INDEX = os.getenv('COG_SEARCH_INDEX')
STORAGE_CONNECTION_STRING = os.getenv('STORAGE_CONNECTION_STRING')
STORAGE_ACCOUNT = os.getenv('STORAGE_ACCOUNT')
STORAGE_CONTAINER = os.getenv('STORAGE_CONTAINER')
STORAGE_KEY = os.getenv('STORAGE_KEY')
COG_SERVICE_KEY = os.getenv('COG_SERVICE_KEY')
DEBUG = os.getenv('DEBUG')
functionAppUrlAndKey = os.getenv('functionAppUrlAndKey')

print(COG_SEARCH_RESOURCE)

mm-cogsearch


## REST API calls for settings up:
1. Datasource
2. Skill Set
3. Index
4. Indexer

In [4]:

def create_datasource(service_name, index_name, search_api_key, storage_connectionstring, storage_container):
    endpoint = "https://{}.search.windows.net/".format(service_name)
    url = '{0}/datasources/{1}-datasource?api-version=2020-06-30'.format(endpoint, index_name)
    print(url)
    payload = json.dumps({
                "description": "Demo files to demonstrate cognitive search capabilities.",
                "type": "azureblob",
                "credentials": {
                    "connectionString": storage_connectionstring
                },
                "container": {
                    "name": storage_container
                }
                })
    headers = {
    'api-key': search_api_key,
    'Content-Type': 'application/json'
            }


    response = requests.request("PUT", url, headers=headers, data=payload)

    if response.status_code == 201 or response.status_code == 204:
        return response, True
    else:
        return response, False
    
def create_skillset(service_name, index, cognitive_search_key, cognitive_service_key, embeddingFunctionAppUriAndKey):
    endpoint = "https://{}.search.windows.net/".format(service_name)
    url = '{0}/skillsets/{1}-skillset?api-version=2021-04-30-Preview'.format(endpoint, index)
    print(url)
    payload = json.dumps({
    "@odata.context": "{}/$metadata#skillsets/$entity".format(endpoint),
    "@odata.etag": "\"0x8DB2B4BF82370CF\"",
    "name": "{0}-skillset".format(index),
    "description": "Skillset created from the portal. skillsetName: index-skillset; contentField: merged_content; enrichmentGranularity: document; knowledgeStoreStorageAccount: ;",
    "skills": [
        {
        "@odata.type": "#Microsoft.Skills.Text.V3.EntityRecognitionSkill",
        "name": "#1",
        "description": None,
        "context": "/document/merged_content",
        "categories": [
            "Organization",
            "URL",
            "DateTime",
            "Skill",
            "Address",
            "Location",
            "Product",
            "IPAddress",
            "Event",
            "Person",
            "Quantity",
            "PersonType",
            "PhoneNumber",
            "Email"
        ],
        "defaultLanguageCode": "en",
        "minimumPrecision": None,
        "modelVersion": None,
        "inputs": [
            {
            "name": "text",
            "source": "/document/merged_content"
            },
            {
            "name": "languageCode",
            "source": "/document/language"
            }
        ],
        "outputs": [
            {
            "name": "persons",
            "targetName": "people"
            },
            {
            "name": "organizations",
            "targetName": "organizations"
            },
            {
            "name": "locations",
            "targetName": "locations"
            }
        ]
        },
        {
        "@odata.type": "#Microsoft.Skills.Text.KeyPhraseExtractionSkill",
        "name": "#2",
        "description": None,
        "context": "/document/merged_content",
        "defaultLanguageCode": "en",
        "maxKeyPhraseCount": None,
        "modelVersion": None,
        "inputs": [
            {
            "name": "text",
            "source": "/document/merged_content"
            },
            {
            "name": "languageCode",
            "source": "/document/language"
            }
        ],
        "outputs": [
            {
            "name": "keyPhrases",
            "targetName": "keyphrases"
            }
        ]
        },
        {
        "@odata.type": "#Microsoft.Skills.Text.LanguageDetectionSkill",
        "name": "#3",
        "description": None,
        "context": "/document",
        "defaultCountryHint": None,
        "modelVersion": None,
        "inputs": [
            {
            "name": "text",
            "source": "/document/merged_content"
            }
        ],
        "outputs": [
            {
            "name": "languageCode",
            "targetName": "language"
            }
        ]
        },
        {
        "@odata.type": "#Microsoft.Skills.Text.TranslationSkill",
        "name": "#4",
        "description": None,
        "context": "/document/merged_content",
        "defaultFromLanguageCode": None,
        "defaultToLanguageCode": "en",
        "suggestedFrom": "en",
        "inputs": [
            {
            "name": "text",
            "source": "/document/merged_content"
            }
        ],
        "outputs": [
            {
            "name": "translatedText",
            "targetName": "translated_text"
            }
        ]
        },
        {
        "@odata.type": "#Microsoft.Skills.Text.PIIDetectionSkill",
        "name": "#5",
        "description": None,
        "context": "/document/merged_content",
        "defaultLanguageCode": "en",
        "minimumPrecision": 0.5,
        "maskingMode": "replace",
        "maskingCharacter": "*",
        "modelVersion": None,
        "piiCategories": [],
        "domain": "none",
        "inputs": [
            {
            "name": "text",
            "source": "/document/merged_content"
            },
            {
            "name": "languageCode",
            "source": "/document/language"
            }
        ],
        "outputs": [
            {
            "name": "piiEntities",
            "targetName": "pii_entities"
            },
            {
            "name": "maskedText",
            "targetName": "masked_text"
            }
        ]
        },
        {
        "@odata.type": "#Microsoft.Skills.Text.MergeSkill",
        "name": "#6",
        "description": None,
        "context": "/document",
        "insertPreTag": " ",
        "insertPostTag": " ",
        "inputs": [
            {
            "name": "text",
            "source": "/document/content"
            },
            {
            "name": "itemsToInsert",
            "source": "/document/normalized_images/*/text"
            },
            {
            "name": "offsets",
            "source": "/document/normalized_images/*/contentOffset"
            }
        ],
        "outputs": [
            {
            "name": "mergedText",
            "targetName": "merged_content"
            }
        ]
        },
        {
        "@odata.type": "#Microsoft.Skills.Vision.OcrSkill",
        "name": "#7",
        "description": None,
        "context": "/document/normalized_images/*",
        "textExtractionAlgorithm": None,
        "lineEnding": "Space",
        "defaultLanguageCode": "en",
        "detectOrientation": True,
        "inputs": [
            {
            "name": "image",
            "source": "/document/normalized_images/*"
            }
        ],
        "outputs": [
            {
            "name": "text",
            "targetName": "text"
            },
            {
            "name": "layoutText",
            "targetName": "layoutText"
            }
        ]
        },
        {
        "@odata.type": "#Microsoft.Skills.Vision.ImageAnalysisSkill",
        "name": "#8",
        "description": None,
        "context": "/document/normalized_images/*",
        "defaultLanguageCode": "en",
        "visualFeatures": [
            "tags",
            "description"
        ],
        "details": [],
        "inputs": [
            {
            "name": "image",
            "source": "/document/normalized_images/*"
            }
        ],
        "outputs": [
            {
            "name": "tags",
            "targetName": "imageTags"
            },
            {
            "name": "description",
            "targetName": "imageCaption"
            }
        ]
        }
        ,
        {
  "@odata.type": "#Microsoft.Skills.Custom.WebApiSkill",
  "uri": embeddingFunctionAppUriAndKey,
  "httpMethod": "POST",
  "timeout": "PT230S",
  "batchSize": 1,
  "degreeOfParallelism": 1,
  "name": "Embeddings",
  "description": "",
  "context": "/document",
  "inputs": [
        {
          "name": "text",
          "source": "/document/merged_content"
        },
        {
          "name": "source",
          "source": "/document/metadata_storage_name"
        }
  ],
  "outputs": [
          {
            "name": "embeddings",
            "targetName": "embeddings"
          },
					{
            "name": "embeddings_text",
            "targetName": "embeddings_text"
          }
  ]
}

    ],
    "cognitiveServices": {
        "@odata.type": "#Microsoft.Azure.Search.CognitiveServicesByKey",
        "description": "SuperCool",
        "key": "{0}".format(cognitive_service_key)
    },
    "knowledgeStore": None,
    "encryptionKey": None
    })
    
    headers = {
        'Content-Type': 'application/json',
        'api-key': '{0}'.format(cognitive_search_key)
    }

    
    response = requests.request("PUT", url, headers=headers, data=payload)

    print(response.text)

    if response.status_code == 201 or response.status_code == 204:
        return response, True
    else:
        return response, False
    
def update_index_semantic(service_name, index, cognitive_search_key):

    endpoint = "https://{}.search.windows.net/".format(service_name)
    url = '{0}/indexes/{1}/?api-version=2021-04-30-Preview'.format(endpoint, index)
    print(url)

    payload = json.dumps({
    "name": index,
    "defaultScoringProfile": "",
    "fields": [
        {
        "name": "content",
        "type": "Edm.String",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "metadata_storage_content_type",
        "type": "Edm.String",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_storage_size",
        "type": "Edm.Int64",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_storage_last_modified",
        "type": "Edm.DateTimeOffset",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_storage_content_md5",
        "type": "Edm.String",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_storage_name",
        "type": "Edm.String",
        "searchable": False,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_storage_path",
        "type": "Edm.String",
        "searchable": False,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": True,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_storage_file_extension",
        "type": "Edm.String",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None, 
        "synonymMaps": []
        },
        {
        "name": "metadata_content_type",
        "type": "Edm.String",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_language",
        "type": "Edm.String",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "metadata_creation_date",
        "type": "Edm.DateTimeOffset",
        "searchable": False,
        "filterable": False,
        "retrievable": False,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": None,
        "synonymMaps": []
        },
        {
        "name": "people",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "organizations",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "locations",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "keyphrases",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "language",
        "type": "Edm.String",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "translated_text",
        "type": "Edm.String",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "en.lucene",
        "synonymMaps": []
        },
        {
        "name": "embeddings_text",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "embeddings",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "pii_entities",
        "type": "Collection(Edm.ComplexType)",
        "fields": [
            {
            "name": "text",
            "type": "Edm.String",
            "searchable": True,
            "filterable": False,
            "retrievable": True,
            "sortable": False,
            "facetable": False,
            "key": False,
            "indexAnalyzer": None,
            "searchAnalyzer": None,
            "analyzer": "standard.lucene",
            "synonymMaps": []
            },
            {
            "name": "type",
            "type": "Edm.String",
            "searchable": True,
            "filterable": False,
            "retrievable": True,
            "sortable": False,
            "facetable": False,
            "key": False,
            "indexAnalyzer": None,
            "searchAnalyzer": None,
            "analyzer": "standard.lucene",
            "synonymMaps": []
            },
            {
            "name": "subtype",
            "type": "Edm.String",
            "searchable": True,
            "filterable": False,
            "retrievable": True,
            "sortable": False,
            "facetable": False,
            "key": False,
            "indexAnalyzer": None,
            "searchAnalyzer": None,
            "analyzer": "standard.lucene",
            "synonymMaps": []
            },
            {
            "name": "offset",
            "type": "Edm.Int32",
            "searchable": False,
            "filterable": False,
            "retrievable": True,
            "sortable": False,
            "facetable": False,
            "key": False,
            "indexAnalyzer": None,
            "searchAnalyzer": None,
            "analyzer": None,
            "synonymMaps": []
            },
            {
            "name": "length",
            "type": "Edm.Int32",
            "searchable": False,
            "filterable": False,
            "retrievable": True,
            "sortable": False,
            "facetable": False,
            "key": False,
            "indexAnalyzer": None,
            "searchAnalyzer": None,
            "analyzer": None,
            "synonymMaps": []
            },
            {
            "name": "score",
            "type": "Edm.Double",
            "searchable": False,
            "filterable": False,
            "retrievable": True,
            "sortable": False,
            "facetable": False,
            "key": False,
            "indexAnalyzer": None,
            "searchAnalyzer": None,
            "analyzer": None,
            "synonymMaps": []
            }
        ]
        },
        {
        "name": "masked_text",
        "type": "Edm.String",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "merged_content",
        "type": "Edm.String",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "text",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "layoutText",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "imageTags",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        },
        {
        "name": "imageCaption",
        "type": "Collection(Edm.String)",
        "searchable": True,
        "filterable": False,
        "retrievable": True,
        "sortable": False,
        "facetable": False,
        "key": False,
        "indexAnalyzer": None,
        "searchAnalyzer": None,
        "analyzer": "standard.lucene",
        "synonymMaps": []
        }
    ],
    "scoringProfiles": [],
    "corsOptions": None,
    "suggesters": [],
    "semantic": {
        "defaultConfiguration": None,
        "configurations": [
        {
            "name": "semanic-config",
            "prioritizedFields": {
            "titleField": {
                "fieldName": "metadata_storage_name"
            },
            "prioritizedContentFields": [
                {
                "fieldName": "merged_content"
                }
            ],
            "prioritizedKeywordsFields": [
                {
                "fieldName": "keyphrases"
                },
                {
                "fieldName": "people"
                },
                {
                "fieldName": "locations"
                }
            ]
            }
        }
        ]
    },
    "analyzers": [],
    "tokenizers": [],
    "tokenFilters": [],
    "charFilters": [],
    "encryptionKey": None,
    "similarity": {
        "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
        "k1": None,
        "b": None
    }
    })
    headers = {
    'api-key': cognitive_search_key,
    'Content-Type': 'application/json'
    }

    response = requests.request("PUT", url, headers=headers, data=payload)

    if response.status_code == 201 or response.status_code == 204:
        return response, True
    else:
        # print('************************')
        # print(response.status_code)
        # print(response.text)
        return response, False

def create_indexer(service_name, index, search_key):
    endpoint = "https://{}.search.windows.net/".format(service_name)
    url = '{0}/indexers/{1}-indexer/?api-version=2021-04-30-Preview'.format(endpoint, index)
    print(url)

    payload = json.dumps({
    "name": "{0}-indexer".format(index),
    "description": "",
    "dataSourceName": "{0}-datasource".format(index),
    "skillsetName": "{0}-skillset".format(index),
    "targetIndexName": "{0}".format(index),
    "disabled": None,
    "schedule": None,
    "parameters": {
        "batchSize": None,
        "maxFailedItems": 0,
        "maxFailedItemsPerBatch": 0,
        "base64EncodeKeys": None,
        "configuration": {
        "dataToExtract": "contentAndMetadata",
        "parsingMode": "default",
        "imageAction": "generateNormalizedImages"
        }
    },
    "fieldMappings": [
        {
        "sourceFieldName": "metadata_storage_path",
        "targetFieldName": "metadata_storage_path",
        "mappingFunction": {
            "name": "base64Encode",
            "parameters": None
        }
        }
    ],
    "outputFieldMappings": [
        {
        "sourceFieldName": "/document/merged_content/people",
        "targetFieldName": "people"
        },
        {
        "sourceFieldName": "/document/merged_content/organizations",
        "targetFieldName": "organizations"
        },
        {
        "sourceFieldName": "/document/merged_content/locations",
        "targetFieldName": "locations"
        },
        {
        "sourceFieldName": "/document/merged_content/keyphrases",
        "targetFieldName": "keyphrases"
        },
        {
        "sourceFieldName": "/document/language",
        "targetFieldName": "language"
        },
        {
        "sourceFieldName": "/document/merged_content/translated_text",
        "targetFieldName": "translated_text"
        },
        {
        "sourceFieldName": "/document/merged_content/pii_entities",
        "targetFieldName": "pii_entities"
        },
        {
        "sourceFieldName": "/document/merged_content/masked_text",
        "targetFieldName": "masked_text"
        },
        {
        "sourceFieldName": "/document/merged_content",
        "targetFieldName": "merged_content"
        },
        {
        "sourceFieldName": "/document/normalized_images/*/text",
        "targetFieldName": "text"
        },
        {
        "sourceFieldName": "/document/normalized_images/*/layoutText",
        "targetFieldName": "layoutText"
        },
        {
        "sourceFieldName": "/document/normalized_images/*/imageTags/*/name",
        "targetFieldName": "imageTags"
        },
        {
        "sourceFieldName": "/document/normalized_images/*/imageCaption",
        "targetFieldName": "imageCaption"
        },
        {
        "sourceFieldName": "/document/embeddings",
        "targetFieldName": "embeddings"
        },
        {
        "sourceFieldName": "/document/embeddings_text",
        "targetFieldName": "embeddings_text"
        }
        
    ],
    "cache": None,
    "encryptionKey": None
    })
    headers = {
    'Content-Type': 'application/json',
    'api-key': '{0}'.format(search_key)
    }


    response = requests.request("PUT", url, headers=headers, data=payload)


    if response.status_code == 201 or response.status_code == 204:
        print('good')
        return response, True
    else:
        print(response.status_code)
        return response, False

In [5]:
# Creating vector search index
## This creates the JSON that will create the search index with vector search enabled.
def index_for_vectors(service_name, index, cognitive_search_key):
    endpoint = "https://{}.search.windows.net/".format(service_name)
    url = '{0}/indexes/{1}-vector/?api-version=2023-07-01-Preview'.format(endpoint, index)
    print(url)

    payload = json.dumps({
    "name": index + "-vector",
    "defaultScoringProfile": "",
    "fields": [
        {
            "name": "key",
            "type": "Edm.String",
            "searchable": False,
            "retrievable": True,
            "key": True,
            "filterable": False,
            "facetable": False,
            "sortable": False
        },
        {
            "name": "title",
            "type": "Edm.String",
            "searchable": True,
            "retrievable": True,
            "key": False,
            "filterable": False,
            "facetable": False,
            "sortable": False
        },
        {
            "name": "content",
            "type": "Edm.String",
            "searchable": True,
            "retrievable": True,
            "key": False,
            "filterable": False,
            "facetable": False,
            "sortable": False
        },
        {
            "name": "path",
            "type": "Edm.String",
            "searchable": True,
            "retrievable": True,
            "key": False,
            "filterable": False,
            "facetable": False,
            "sortable": False
        },        
        {
            "name": "titleVector",
            "type": "Collection(Edm.Single)",
            "searchable": True,
            "retrievable": True,
            "dimensions": 1536,
            "vectorSearchConfiguration": "my-vector-config"
        },
        {
            "name": "contentVector",
            "type": "Collection(Edm.Single)",
            "searchable": True,
            "retrievable": True,
            "dimensions": 1536,
            "vectorSearchConfiguration": "my-vector-config"
        }
    ],
    "corsOptions": {
        "allowedOrigins": [
            "*"
        ],
        "maxAgeInSeconds": 60
    },
    "vectorSearch": {
        "algorithmConfigurations": [
            {
                "name": "my-vector-config",
                "kind": "hnsw",
                "hnswParameters": {
                    "m": 4,
                    "efConstruction": 400,
                    "metric": "cosine"
                }
            }
        ]
    },
    "semantic": {
        "configurations": [
            {
                "name": "my-semantic-config",
                "prioritizedFields": {
                    "titleField": {
                        "fieldName": "title"
                    },
                    "prioritizedContentFields": [
                        {
                            "fieldName": "content"
                        }
                    ],
                    "prioritizedKeywordsFields": [
                        {
                            "fieldName": "content"
                        }
                    ]
                }
            }
        ]
    }
    })
    headers = {
    'api-key': cognitive_search_key,
    'Content-Type': 'application/json'
    }

    response = requests.request("PUT", url, headers=headers, data=payload)

    if response.status_code == 201 or response.status_code == 204:
        return response, True
    else:
        print('************************')
        print(response.status_code)
        print(response.text)
        return response, False


In [9]:

response, success =  index_for_vectors(COG_SEARCH_RESOURCE, COG_SEARCH_INDEX, COG_SEARCH_KEY)
print(response)


https://mm-cogsearch.search.windows.net//indexes/mmsearch-vector/?api-version=2023-07-01-Preview
<Response [201]>


In [10]:
response, success = create_datasource(COG_SEARCH_RESOURCE, COG_SEARCH_INDEX, COG_SEARCH_KEY, STORAGE_CONNECTION_STRING, STORAGE_CONTAINER)
print(response)
if success == True:
    print(functionAppUrlAndKey)
    response, success = create_skillset(COG_SEARCH_RESOURCE, COG_SEARCH_INDEX, COG_SEARCH_KEY, COG_SERVICE_KEY, functionAppUrlAndKey)

    print(response)
if success == True:
    response, success =  update_index_semantic(COG_SEARCH_RESOURCE, COG_SEARCH_INDEX, COG_SEARCH_KEY)
    print(response)
if success == True:
    response, success =  create_indexer(COG_SEARCH_RESOURCE, COG_SEARCH_INDEX, COG_SEARCH_KEY)
    print(response)

https://mm-cogsearch.search.windows.net//datasources/mmsearch-datasource?api-version=2020-06-30
<Response [201]>
https://mmtestdeployment.azurewebsites.net/api/HttpTrigger1?code=1xVGCqCG3Txs0ovqTBplLcmA-C-TnURRhdmWbvqjm6gSAzFupHBr6Q==
https://mm-cogsearch.search.windows.net//skillsets/mmsearch-skillset?api-version=2021-04-30-Preview
{"@odata.context":"https://mm-cogsearch.search.windows.net/$metadata#skillsets/$entity","@odata.etag":"\"0x8DBBF262724C1E5\"","name":"mmsearch-skillset","description":"Skillset created from the portal. skillsetName: index-skillset; contentField: merged_content; enrichmentGranularity: document; knowledgeStoreStorageAccount: ;","skills":[{"@odata.type":"#Microsoft.Skills.Text.V3.EntityRecognitionSkill","name":"#1","description":null,"context":"/document/merged_content","categories":["Organization","URL","DateTime","Skill","Address","Location","Product","IPAddress","Event","Person","Quantity","PersonType","PhoneNumber","Email"],"defaultLanguageCode":"en","mini

In [8]:
print("You now have 2 indexes.")
print("Your main index, which includes all the data, and your vector index, which includes the vector search data.")

You now have 2 indexes.
Your main index, which includes all the data, and your vector index, which includes the vector search data.
