In [50]:
import os
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient, SearchIndexerClient
from azure.search.documents.indexes.models import (
    ComplexField,
    CorsOptions,
    SearchIndex,
    SearchIndexer,
    ScoringProfile,
    SearchFieldDataType,
    SimpleField,
    SearchableField, 
    SearchIndexerDataContainer,
    SearchIndexerDataSourceConnection,
    SearchIndexerSkillset,
    InputFieldMappingEntry,
    OutputFieldMappingEntry,
    EntityRecognitionSkill,
    CognitiveServicesAccount,
    CognitiveServicesAccountKey,
    KeyPhraseExtractionSkill, 
    OcrSkill,
    SentimentSkill,
    MergeSkill,
    ImageAnalysisSkill,
    WebApiSkill
)


# Get the service endpoint and API key from the environment
ADMIN_KEY = os.environ.get("ADMIN_KEY", "default")
ENDPOINT = os.environ.get("ENDPOINT", "default")
COGSERVKEY = os.environ.get("COGSERVKEY", "default")
COGSERVENDPOINT = os.environ.get("COGSERVENDPOINT", "default")
COSMOSURI = os.environ.get("COSMOSURI", "default")
COSMOSKEY = os.environ.get("COSMOSKEY", "default")


# Create an SDK client
index_name = "sc"

admin_client = SearchIndexClient(endpoint=ENDPOINT,index_name=index_name,credential=AzureKeyCredential(ADMIN_KEY))
search_client = SearchClient(endpoint=ENDPOINT,index_name=index_name,credential=AzureKeyCredential(ADMIN_KEY))
indexer_client = SearchIndexerClient(endpoint=ENDPOINT,index_name=index_name,credential=AzureKeyCredential(ADMIN_KEY))


try:
    result = admin_client.delete_index(index_name)
    print ('Index', index_name, 'Deleted')
except Exception as ex:
    print (ex)


# Specify the index schema
name = index_name
fields = [
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SearchableField(name="url", type=SearchFieldDataType.String, sortable=True, filterable=True,),
        SearchableField(name="file_name", type=SearchFieldDataType.String, analyzer_name="en.lucene"),
        SearchableField(name="content", type=SearchFieldDataType.String, analyzer_name="en.lucene"),
        SearchableField(name="metadata_storage_name", type=SearchFieldDataType.String, facetable=True, filterable=True, sortable=True),
        SimpleField(name="metadata_storage_size", type=SearchFieldDataType.Double, facetable=True, filterable=True, sortable=True),
        SearchableField(name="metadata_creation_date",  type=SearchFieldDataType.DateTimeOffset, facetable=True, filterable=True, sortable=True),
        SearchableField(name="summary", type=SearchFieldDataType.String, facetable=True, filterable=True),
        SearchableField(name="merged_text",  type=SearchFieldDataType.String, facetable=True, filterable=True),



    ]
    
cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles = []
#suggester = [{'name': 'sg', 'source_fields': ['Tags', 'Address/City', 'Address/Country']}]


index = SearchIndex(
    name=name,
    fields=fields,
    scoring_profiles=scoring_profiles,
    suggesters = None,
    cors_options=cors_options)

try:
    result = admin_client.create_index(index)
    print ('Index', result.name, 'created')
except Exception as ex:
    print (ex)


container = SearchIndexerDataContainer(name='search')

data_source_name = "sc-docs"
data_source = SearchIndexerDataSourceConnection(
    name=data_source_name,
    type="azureblob",
    connection_string="DefaultEndpointsProtocol=https;AccountName=sccstorageogsearch;AccountKey=RfYe3VAmwHs/P+FVq4lhViuJgGGCc3n9F4wO0e+pHlJ7QxJ9h32Rzl/hkfO2KzSFuVKvFiTiPZu6+AStGbo7vA==;EndpointSuffix=core.windows.net",
    container=container
)


id_input = InputFieldMappingEntry(name="id", source="/document/id")
summ_input = InputFieldMappingEntry(name="content", source="/document/content")
summ_output = OutputFieldMappingEntry(name="text", target_name="summary")
sum_ws = WebApiSkill(name="custom_summ_skill", inputs=[id_input, summ_input], outputs=[summ_output], context="/document",
                uri="https://scfuncapp.azurewebsites.net/api/sc-summarization?code=fRJAOC2HKVWRYchocHSaUvnsoXux1crMSnWfekngTYA8AzFugxoACw==")


skillset_name = 'sc-skills'
skillset = SearchIndexerSkillset(name=skillset_name, skills=[sum_ws], 
                                 description="SC skillset", 
                                 cognitive_services_account=CognitiveServicesAccountKey(key=COGSERVKEY))

indexer_client.delete_skillset(skillset_name)
result = indexer_client.create_skillset(skillset)

# create an indexer
indexer_name = "sc-indexer"
indexer = SearchIndexer(
    name=indexer_name,
    data_source_name=data_source_name,
    target_index_name=index_name,
    skillset_name=skillset_name,
    field_mappings = [ { "sourceFieldName": "metadata_storage_path", "targetFieldName": "url" },
                       { "sourceFieldName": "metadata_storage_name", "targetFieldName": "file_name" },
                       ],
    output_field_mappings = [
                                {"sourceFieldName": "/document/summary","targetFieldName": "summary", "mappingFunction":None}, 
    ]
)

indexer_client.delete_indexer(indexer)
indexer_client.delete_data_source_connection(data_source)
result = indexer_client.create_data_source_connection(data_source)
print(f"Create new Data Source Connection - {data_source_name}")    

result = indexer_client.create_indexer(indexer)
indexer_client.run_indexer(indexer_name)


Index sc Deleted
Index sc created
Create new Data Source Connection - sc-docs


In [17]:
import json
from dotenv import load_dotenv
from azure.cosmos import CosmosClient, PartitionKey

COSMOSURI = os.environ.get("COSMOSURI", "default")
COSMOSKEY = os.environ.get("COSMOSKEY", "default")

client = CosmosClient(url=COSMOSURI, credential=COSMOSKEY)
database = client.create_database_if_not_exists(id="scdb")

partitionKeyPath = PartitionKey(path="/categoryId")
container = database.create_container_if_not_exists(id="documents", partition_key=partitionKeyPath, offer_throughput=400)


In [None]:
container.delete_item("aHR0cHM6Ly9zY2NzdG9yYWdlb2dzZWFyY2guYmxvYi5jb3JlLndpbmRvd3MubmV0L3NlYXJjaC8yMDE4NjMucGRm0", 
                      partition_key="61dba35b-4f02-45c5-b648-c6badc0cbd79")

In [47]:
newItem = {
    "id": "aHR0cHM6Ly9zY2NzdG9yYWdlb2dzZWFyY2guYmxvYi5jb3JlLndpbmRvd3MubmV0L3NlYXJjaC8yMDE4NjMucGRm0",
    "categoryId": "61dba35b-4f02-45c5-b648-c6badc0cbd79",
    "text": "THIS IS THE EDITED SUMMARY: At Microsoft, we have been on a quest to advance AI. Good night!" 
}

container.upsert_item(newItem)

{'id': 'aHR0cHM6Ly9zY2NzdG9yYWdlb2dzZWFyY2guYmxvYi5jb3JlLndpbmRvd3MubmV0L3NlYXJjaC8yMDE4NjMucGRm0',
 'categoryId': '61dba35b-4f02-45c5-b648-c6badc0cbd79',
 'text': 'THIS IS THE EDITED SUMMARY: At Microsoft, we have been on a quest to advance AI. Good night!',
 '_rid': 'cK14AMlBVogBAAAAAAAAAA==',
 '_self': 'dbs/cK14AA==/colls/cK14AMlBVog=/docs/cK14AMlBVogBAAAAAAAAAA==/',
 '_etag': '"c7007523-0000-4d00-0000-63a05c090000"',
 '_attachments': 'attachments/',
 '_ts': 1671453705}

In [22]:
QUERY = "SELECT * FROM documents p WHERE p.categoryId = @categoryId AND p.id = @id"
CATEGORYID = "61dba35b-4f02-45c5-b648-c6badc0cbd79"
d_id = "aHR0cHM6Ly9zY2NzdG9yYWdlb2dzZWFyY2guYmxvYi5jb3JlLndpbmRvd3MubmV0L3NlYXJjaC8yMDE4NjMucGRm0"
params = [dict(name="@categoryId", value=CATEGORYID), dict(name="@id", value=d_id)]

items = container.query_items(
    query=QUERY, parameters=params, enable_cross_partition_query=False
)

# for item in items:
#     print(json.dumps(item, indent=True))

document = None

try:
    document = items.next()
except Exception as e:
    print("No Documents found")

document['text']



{
 "id": "aHR0cHM6Ly9zY2NzdG9yYWdlb2dzZWFyY2guYmxvYi5jb3JlLndpbmRvd3MubmV0L3NlYXJjaC8yMDE4NjMucGRm0",
 "categoryId": "61dba35b-4f02-45c5-b648-c6badc0cbd79",
 "text": "hello5",
 "_rid": "cK14AMlBVogBAAAAAAAAAA==",
 "_self": "dbs/cK14AA==/colls/cK14AMlBVog=/docs/cK14AMlBVogBAAAAAAAAAA==/",
 "_etag": "\"c700f822-0000-4d00-0000-63a056300000\"",
 "_attachments": "attachments/",
 "_ts": 1671452208
}
