Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update azuresearch.py following recent change from azure-search-documents library #13472

Merged
merged 5 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions docs/docs/integrations/vectorstores/azuresearch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@
"collapsed": false
},
"source": [
"# Azure Cognitive Search\n",
"# Azure AI Search\n",
"\n",
"[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
"\n",
"Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8"
"[Azure AI Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search` and `Azure Cognitive Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Install Azure Cognitive Search SDK"
"# Install Azure AI Search SDK"
]
},
{
Expand All @@ -26,7 +25,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install azure-search-documents==11.4.0b8\n",
"!pip install azure-search-documents\n",
"!pip install azure-identity"
]
},
Expand Down
120 changes: 88 additions & 32 deletions libs/langchain/langchain/vectorstores/azuresearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
Optional,
Tuple,
Type,
Union,
)

import numpy as np
Expand All @@ -36,10 +37,13 @@
from azure.search.documents.indexes.models import (
ScoringProfile,
SearchField,
SemanticSettings,
VectorSearch,
)

try:
from azure.search.documents.indexes.models import SemanticSearch
except ImportError:
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0

# Allow overriding field names for Azure Search
FIELDS_ID = get_from_env(
Expand Down Expand Up @@ -69,7 +73,7 @@ def _get_search_client(
semantic_configuration_name: Optional[str] = None,
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[SemanticSettings] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
default_fields: Optional[List[SearchField]] = None,
Expand All @@ -81,15 +85,30 @@ def _get_search_client(
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SearchIndex,
SemanticConfiguration,
SemanticField,
SemanticSettings,
VectorSearch,
)

# class names changed for versions >= 11.4.0
try:
from azure.search.documents.indexes.models import (
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
SemanticPrioritizedFields, # PrioritizedFields outdated
SemanticSearch, # SemanticSettings outdated
)

NEW_VERSION = True
except ImportError:
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SemanticSettings,
)

NEW_VERSION = False

default_fields = default_fields or []
if key is None:
credential = DefaultAzureCredential()
Expand Down Expand Up @@ -135,34 +154,71 @@ def fmt_err(x: str) -> str:
fields = default_fields
# Vector search configuration
if vector_search is None:
vector_search = VectorSearch(
algorithm_configurations=[
HnswVectorSearchAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
if NEW_VERSION:
# >= 11.4.0:
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms)
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
else: # < 11.4.0
vector_search = VectorSearch(
algorithm_configurations=[
HnswVectorSearchAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)

# Create the semantic settings with the configuration
if semantic_settings is None and semantic_configuration_name is not None:
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
if NEW_VERSION:
# <=11.4.0: SemanticSettings --> SemanticSearch
# PrioritizedFields(prioritized_content_fields)
# --> SemanticPrioritizedFields(content_fields)
semantic_settings = SemanticSearch(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=SemanticPrioritizedFields(
content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
else: # < 11.4.0
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
# Create the search index with the semantic settings and vector search
index = SearchIndex(
name=index_name,
Expand Down Expand Up @@ -196,7 +252,7 @@ def __init__(
semantic_query_language: str = "en-us",
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[SemanticSettings] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
**kwargs: Any,
Expand Down
Loading