In [1]:
from opensearch_vector_search import OpenSearchVectorSearch
from langchain_openai import OpenAIEmbeddings
import os

Loading all environment variables

In [None]:
from dotenv import load_dotenv

load_dotenv()

#### Embedding model 

In [2]:
def get_embedding_model(model_name: str):
    return OpenAIEmbeddings(
        model=model_name
    )

embedding_model = get_embedding_model(model_name="text-embedding-ada-002")

#### Instantiate OpenSearchVectorSearch vector store

In [3]:
opensearch_vectorstore = OpenSearchVectorSearch(
    index_name=os.getenv("INDEX_NAME"),
    embedding_function=embedding_model,
    opensearch_url=os.getenv("OPENSEARCH_URL"),
    http_auth= (os.getenv("OPENSEARCH_USERNAME"),os.getenv("OPENSEARCH_PASSWORD")),
    use_ssl=False,
    verify_certs=False,
    ssl_assert_hostname=False,
    ssl_show_warn=False
)

### Test-1: Testing Hybrid Search and Configuring of Search Pipeline

#### 1.1 Testing Configuration of Search Pipeline 

In [4]:
opensearch_vectorstore.configure_search_pipelines(
    pipeline_name="search_pipeline_keyword_0.3_vector_0.7",
    keyword_weight=0.3,
    vector_weight=0.7,
)

{'acknowledged': True}

#### 1.2 Testing similarity_search() with Search_type = "Hybrid Search"

In [None]:
query = "what are the country named in our database?"

top_k = 3

pipeline_name = "search_pipeline_keyword_0.3_vector_0.7"

matched_docs = opensearch_vectorstore.similarity_search(
                query=query,
                k=top_k,
                search_type="hybrid_search",
                search_pipeline = pipeline_name,
                vector_field="vector_field",
                text_field="text",
                metadata_field="metadata"
            )

matched_docs

[Document(page_content='Italy,[a] officially the Italian Republic,[b] is a country in Southern[12] and Western[13][c] Europe. It is located on a peninsula that extends into the middle of the Mediterranean Sea, with the Alps on its northern land border, as well as islands, notably Sicily and Sardinia.[15] Italy shares its borders with France, Switzerland, Austria, Slovenia and two enclaves: Vatican City and San Marino. It', metadata={'date': '20240916', 'parent_id': 'doc345', 'name': 'Italy', 'source': 'https://api.python.langchain.com/', 'published': True, 'lang': 'eng'}),
 Document(page_content='and two enclaves: Vatican City and San Marino. It is the tenth-largest country in Europe, covering an area of 301,340 km2 (116,350 sq mi),[3] and third-most populous member state of the European Union, with a population of nearly 60 million.[16] Its capital and largest city is Rome; other major urban areas include Milan, Naples, Turin, Florence, and Venice.In antiquity, Italy was home to numer

#### 1.3 Testing similarity_search_with_score() with Search_type = "Hybrid Search"

In [5]:
query = "what are the country named in our database?"

top_k = 3

pipeline_name = "search_pipeline_keyword_0.3_vector_0.7"

matched_docs = opensearch_vectorstore.similarity_search_with_score(
                query=query,
                k=top_k,
                search_type="hybrid_search",
                search_pipeline = pipeline_name,
                vector_field="vector_field",
                text_field="text",
                metadata_field="metadata"
            )

matched_docs

[(Document(page_content='Italy,[a] officially the Italian Republic,[b] is a country in Southern[12] and Western[13][c] Europe. It is located on a peninsula that extends into the middle of the Mediterranean Sea, with the Alps on its northern land border, as well as islands, notably Sicily and Sardinia.[15] Italy shares its borders with France, Switzerland, Austria, Slovenia and two enclaves: Vatican City and San Marino. It', metadata={'date': '20240916', 'parent_id': 'doc345', 'name': 'Italy', 'source': 'https://api.python.langchain.com/', 'published': True, 'lang': 'eng'}),
  0.7),
 (Document(page_content='and two enclaves: Vatican City and San Marino. It is the tenth-largest country in Europe, covering an area of 301,340 km2 (116,350 sq mi),[3] and third-most populous member state of the European Union, with a population of nearly 60 million.[16] Its capital and largest city is Rome; other major urban areas include Milan, Naples, Turin, Florence, and Venice.In antiquity, Italy was hom

#### 1.4 Testing similarity_search_with_score() & "Hybrid Search" with filtering

In [9]:
query = "what are the country named in our database?"

top_k = 3

pipeline_name = "search_pipeline_keyword_0.3_vector_0.7"

matched_docs = opensearch_vectorstore.similarity_search_with_score(
                query=query,
                k=top_k,
                search_type="hybrid_search",
                search_pipeline = pipeline_name,
                vector_field="vector_field",
                text_field="text",
                metadata_field="metadata",
                post_filter= {"bool": {"filter": {"term": {"metadata.published": False}}}}
            )

matched_docs

[(Document(page_content='A vector database, vector store or vector search engine is a database that can store vectors (fixed-length lists of numbers) along with other data items. Vector databases typically implement one or more Approximate Nearest Neighbor (ANN) algorithms,[1][2] so that one can search the database with a query vector to retrieve the closest matching database records.Vectors are mathematical', metadata={'date': '2022-06-01', 'parent_id': 'doc653', 'name': 'Vector Store', 'source': 'https://api.python.langchain.com/', 'published': False, 'lang': 'eng'}),
  0.3),
 (Document(page_content="database records.Vectors are mathematical representations of data in a high-dimensional space. In this space, each dimension corresponds to a feature of the data, with the number of dimensions ranging from few hundreds to tens of thousands, depending on the complexity of the data being represented. A vector's position in this space represents its characteristics. Words, phrases, or entir

### Test-2: Testing Hybrid Search and Configuring of Search Pipeline

#### 2.1 Testing Similarity_Search() with Search_type = "Approximate Search" (Default)

In [6]:
query = "what are the country named in our database?"

top_k = 3

matched_docs = opensearch_vectorstore.similarity_search(
                query=query,
                k=top_k,
                search_type="approximate_search",
                search_pipeline = pipeline_name,
                vector_field="vector_field",
                text_field="text",
                metadata_field="metadata"
            )

matched_docs

[Document(page_content='Italy,[a] officially the Italian Republic,[b] is a country in Southern[12] and Western[13][c] Europe. It is located on a peninsula that extends into the middle of the Mediterranean Sea, with the Alps on its northern land border, as well as islands, notably Sicily and Sardinia.[15] Italy shares its borders with France, Switzerland, Austria, Slovenia and two enclaves: Vatican City and San Marino. It', metadata={'name': 'Italy', 'source': 'https://api.python.langchain.com/', 'date': '20240916', 'lang': 'eng', 'published': True, 'parent_id': 'doc345'}),
 Document(page_content='and two enclaves: Vatican City and San Marino. It is the tenth-largest country in Europe, covering an area of 301,340 km2 (116,350 sq mi),[3] and third-most populous member state of the European Union, with a population of nearly 60 million.[16] Its capital and largest city is Rome; other major urban areas include Milan, Naples, Turin, Florence, and Venice.In antiquity, Italy was home to numer

#### 2.2 Testing similarity_search_with_score() with Search_type = "Approximate Search" (Default)

In [7]:
query = "what are the country named in our database?"

top_k = 3

matched_docs = opensearch_vectorstore.similarity_search_with_score(
                query=query,
                k=top_k,
                search_type="approximate_search",
                search_pipeline = pipeline_name,
                vector_field="vector_field",
                text_field="text",
                metadata_field="metadata"
            )

matched_docs

[(Document(page_content='Italy,[a] officially the Italian Republic,[b] is a country in Southern[12] and Western[13][c] Europe. It is located on a peninsula that extends into the middle of the Mediterranean Sea, with the Alps on its northern land border, as well as islands, notably Sicily and Sardinia.[15] Italy shares its borders with France, Switzerland, Austria, Slovenia and two enclaves: Vatican City and San Marino. It', metadata={'name': 'Italy', 'source': 'https://api.python.langchain.com/', 'date': '20240916', 'lang': 'eng', 'published': True, 'parent_id': 'doc345'}),
  1.7577173),
 (Document(page_content='and two enclaves: Vatican City and San Marino. It is the tenth-largest country in Europe, covering an area of 301,340 km2 (116,350 sq mi),[3] and third-most populous member state of the European Union, with a population of nearly 60 million.[16] Its capital and largest city is Rome; other major urban areas include Milan, Naples, Turin, Florence, and Venice.In antiquity, Italy w