In [9]:
import os
from dotenv import load_dotenv
from langchain.schema import HumanMessage
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from azure.core.credentials import AzureKeyCredential  
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchField,  
    VectorSearch,
    VectorSearchProfile,
    HnswAlgorithmConfiguration,
    VectorSearchAlgorithmKind,
    HnswParameters,
    VectorSearchAlgorithmMetric,
    ExhaustiveKnnAlgorithmConfiguration,
    ExhaustiveKnnParameters,
)

load_dotenv()

True

# Configure OpenAI and Vector settings

In [10]:
azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
azure_openai_api_key = os.getenv('AZURE_OPENAI_API_KEY')
azure_search_endpoint = os.getenv('AZURE_SEARCH_SERVICE_ENDPOINT')
azure_search_key = os.getenv('AZURE_SEARCH_API_KEY')
model_deployment_1 : str = "gpt-4"
model_deployment_2: str = "text-embedding-ada-002"
version : str = "2023-05-15"
index_name: str = "langchain-vector"
vector_search_dimensions : int = 1536;
credential = AzureKeyCredential(azure_search_key)

# Initialize gpt-4, embedding model and connect to Azure AI Search

In [None]:
model = AzureChatOpenAI(
    openai_api_version=version,
    azure_deployment=model_deployment_1,
)

message = HumanMessage(
    content="Translate this sentence from English to Dutch. I love programming."
)
print(model([message]))

embeddings = AzureOpenAIEmbeddings(
    azure_deployment=model_deployment_2,
    openai_api_version=version,
)

text = "This is a test document."
query_result = embeddings.embed_query(text)
doc_result = embeddings.embed_documents([text])
print(query_result)

# Create the search index schema and vector search configuration

In [12]:
index_client = SearchIndexClient(endpoint=azure_search_endpoint, credential=credential)

fields = [
    SimpleField(
        name="id",
        type=SearchFieldDataType.String,
        key=True,
        filterable=True,
    ),
    SearchableField(
        name="content",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    SearchField(
        name="contentVector",
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
        searchable=True,
        vector_search_dimensions=vector_search_dimensions,
        vector_search_profile_name="hnswProfile",
    ),
    SearchableField(
        name="metadata",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    SearchableField(
        name="title",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    SimpleField(
        name="tag",
        type=SearchFieldDataType.String,
        filterable=True,
    ),
]

vector_search = VectorSearch(
    algorithms=[
        HnswAlgorithmConfiguration(
            name="hnsw",
            kind=VectorSearchAlgorithmKind.HNSW,
            parameters=HnswParameters(
                m=4,
                ef_construction=400,
                ef_search=500,
                metric=VectorSearchAlgorithmMetric.COSINE
            )
        ),
        ExhaustiveKnnAlgorithmConfiguration(
            name="exhaustiveKnn",
            kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
            parameters=ExhaustiveKnnParameters(
                metric=VectorSearchAlgorithmMetric.COSINE
            )
        )
    ],
    profiles=[
        VectorSearchProfile(
            name="hnswProfile",
            algorithm_configuration_name="hnsw",
        ),
        VectorSearchProfile(
            name="exhaustiveKnnProfile",
            algorithm_configuration_name="exhaustiveKnn",
        )
    ]
)

index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)
result = index_client.create_or_update_index(index)
print(f' {result.name} created')

 langchain-vector created
