In [None]:
%pip install --upgrade numpy openai langchain azure-storage-blob azure-identity unstructured
%pip install --index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/ azure-search-documents==11.4.0a20230509004

In [None]:
# Mac OS: Requires libmagic installation via homebrew
# %brew install libmagic

In [1]:
# Reading blobs from Azure Storage Account

import os

from langchain.document_loaders import AzureBlobStorageContainerLoader


loader = AzureBlobStorageContainerLoader(
    conn_str=os.getenv("AZURE_STORAGE_ACCOUNT_CONNECTION_STRING"),
    container=os.getenv("AZURE_STORAGE_CONTAINER_NAME"),
)

# Load and split blobs into chunks
pages = loader.load_and_split()

In [None]:
# Insert text and embeddings into Azure Cognitive Search

from openai.embeddings_utils import get_embedding
from langchain.vectorstores.azuresearch import AzureSearch


def embeddings(text):
    return get_embedding(text, engine="embedding-ada")


# Make sure to create index first:
# https://github.com/Azure/cognitive-search-vector-pr/blob/main/docs/rest-api-reference/create-or-update-index.md
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
    azure_search_key=os.getenv("AZURE_SEARCH_API_KEY"),
    index_name=os.getenv("AZURE_SEARCH_INDEX_NAME"),
    embedding_function=embeddings,
)

vector_store.add_documents(documents=pages)

In [None]:
# Perform a similarity search

docs = vector_store.similarity_search(
    query="How to become a champion",
    k=3,
    search_type="similarity",
)

print(docs[0].page_content)