Skip to content

Commit

Permalink
Update azuresearch.py following recent change from azure-search-docum…
Browse files Browse the repository at this point in the history
…ents library (#13472)

- **Description:** 

Reference library azure-search-documents has been adapted in version
11.4.0:

1. Notebook explaining Azure AI Search updated with most recent info
2. HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
3. PrioritizedFields(prioritized_content_fields) -->
SemanticPrioritizedFields(content_fields)
4. SemanticSettings --> SemanticSearch
5. VectorSearch(algorithm_configurations) -->
VectorSearch(configurations)

--> Changes now reflected on Langchain: default vector search config
from langchain is now compatible with officially released library from
Azure.

  - **Issue:**
Issue creating a new index (due to wrong class used for default vector
search configuration) if using latest version of azure-search-documents
with current langchain version
  - **Dependencies:** azure-search-documents>=11.4.0,
  - **Tag maintainer:** ,

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
  • Loading branch information
guillaumedelande and efriis committed Dec 5, 2023
1 parent 5cb3393 commit ea0afd0
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 38 deletions.
11 changes: 5 additions & 6 deletions docs/docs/integrations/vectorstores/azuresearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@
"collapsed": false
},
"source": [
"# Azure Cognitive Search\n",
"# Azure AI Search\n",
"\n",
"[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
"\n",
"Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8"
"[Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search` and `Azure Cognitive Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Install Azure Cognitive Search SDK"
"# Install Azure AI Search SDK"
]
},
{
Expand All @@ -26,7 +25,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install azure-search-documents==11.4.0b8\n",
"!pip install azure-search-documents\n",
"!pip install azure-identity"
]
},
Expand Down
120 changes: 88 additions & 32 deletions libs/langchain/langchain/vectorstores/azuresearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Optional,
Tuple,
Type,
Union,
)

import numpy as np
Expand All @@ -36,10 +37,13 @@
from azure.search.documents.indexes.models import (
ScoringProfile,
SearchField,
SemanticSettings,
VectorSearch,
)

try:
from azure.search.documents.indexes.models import SemanticSearch
except ImportError:
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0

# Allow overriding field names for Azure Search
FIELDS_ID = get_from_env(
Expand Down Expand Up @@ -69,7 +73,7 @@ def _get_search_client(
semantic_configuration_name: Optional[str] = None,
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[SemanticSettings] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
default_fields: Optional[List[SearchField]] = None,
Expand All @@ -81,15 +85,30 @@ def _get_search_client(
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SearchIndex,
SemanticConfiguration,
SemanticField,
SemanticSettings,
VectorSearch,
)

# class names changed for versions >= 11.4.0
try:
from azure.search.documents.indexes.models import (
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
SemanticPrioritizedFields, # PrioritizedFields outdated
SemanticSearch, # SemanticSettings outdated
)

NEW_VERSION = True
except ImportError:
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SemanticSettings,
)

NEW_VERSION = False

default_fields = default_fields or []
if key is None:
credential = DefaultAzureCredential()
Expand Down Expand Up @@ -135,34 +154,71 @@ def fmt_err(x: str) -> str:
fields = default_fields
# Vector search configuration
if vector_search is None:
vector_search = VectorSearch(
algorithm_configurations=[
HnswVectorSearchAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
if NEW_VERSION:
# >= 11.4.0:
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms)
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
else: # < 11.4.0
vector_search = VectorSearch(
algorithm_configurations=[
HnswVectorSearchAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)

# Create the semantic settings with the configuration
if semantic_settings is None and semantic_configuration_name is not None:
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
if NEW_VERSION:
# <=11.4.0: SemanticSettings --> SemanticSearch
# PrioritizedFields(prioritized_content_fields)
# --> SemanticPrioritizedFields(content_fields)
semantic_settings = SemanticSearch(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=SemanticPrioritizedFields(
content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
else: # < 11.4.0
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
# Create the search index with the semantic settings and vector search
index = SearchIndex(
name=index_name,
Expand Down Expand Up @@ -196,7 +252,7 @@ def __init__(
semantic_query_language: str = "en-us",
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[SemanticSettings] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
**kwargs: Any,
Expand Down

0 comments on commit ea0afd0

Please sign in to comment.