# Vantage & LangChain: _Storage & Indexing Pipeline_

## Initial Setup

### Keys and Secrets

In [13]:
OPENAI_KEY = "YOUR_OPENAI_KEY"
VANTAGE_API_KEY = "YOUR_VANTAGE_API_KEY"
JWT = "YOUR_JWT_TOKEN"

### Vantage Configuration

In [2]:
LLM = "text-embedding-ada-002"
EMBEDDING_DIMENSION = 1536

### Data Sample

In [3]:
TEXTS = [
    "Ted goes to the gym and exercises three times a week during summer.",
    "Yuriko and Mina are going to Hawaii this summer.",
    "Many people eat cereal for breakfast.",
]

METADATA = [
    {"planet": "Earth", "something_else": "Some value"},
    {"planet": "Earth"},
    {"planet": "Mars"},
]

### Vantage SDK Installation

In [4]:
!pip install -i https://test.pypi.org/simple/ vantage-sdk==0.0.12 -qU

In [7]:
!pip install langchain langchain-openai -qU

### LangChain VectorStore Preparation

- Initialization of the VantageClient
- Initialization of the Embeddings object

In [14]:
from vantage import VantageClient
from langchain_openai import OpenAIEmbeddings

vantage_client = VantageClient.using_jwt_token(
    vantage_api_jwt_token=JWT,
    account_id="jelena-dostic",
    api_host="https://api.dev-a.dev.vantagediscovery.com",
)

langchain_embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)

Installation of the local LangChain library which includes Vantage vector store

In [9]:
!pip uninstall langchain-community -y
!python -m pip install ../../../../../../../SmartCatLabs/langchain/libs/community -qU

Found existing installation: langchain-community 0.0.27
Uninstalling langchain-community-0.0.27:
  Successfully uninstalled langchain-community-0.0.27
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.1.11 requires langchain-community<0.1,>=0.0.25, but you have langchain-community 0.0.24 which is incompatible.[0m[31m
[0m

### Initialization of the Vantage vector store using LangChain
- 1 - Vantage Managed Embeddings, using regular initialization
- 2 - User Provided Embeddings, using class method from_texts
- 3 - Initialize using existing collection (id)
- 4 - Initialize using existing collection (collection object)
- 5 - From documents

In [15]:
from langchain_community.vectorstores.vantage import Vantage

In [16]:
external_keys = vantage_client.get_external_api_keys()
external_keys

[ExternalAPIKey(external_key_id='c9c205a2-feca-428a-bb70-d29583ae050a', account_id='jelena-dostic', external_key_created_date='2024-03-03T23:20:15', url=None, llm_provider='OpenAI', llm_secret='sk-YE**********************************************', state='Active')]

In [17]:
EXTERNAL_KEY_ID = "c9c205a2-feca-428a-bb70-d29583ae050a"

#### 1 - Vantage Managed Embeddings, Regular Initialization

- Creating vector store, then adding texts and metadata using add_texts method

In [18]:
COLLECTION_ID_VME = "vme-ipynb"

vector_store_vme = Vantage(
    client=vantage_client,
    embedding=langchain_embeddings,
    collection_id=COLLECTION_ID_VME,
    embedding_dimension=EMBEDDING_DIMENSION,
    user_provided_embeddings=False,
    llm=LLM,
    external_key_id=EXTERNAL_KEY_ID,
)

vector_store_vme

<langchain_community.vectorstores.vantage.Vantage at 0x1143edcf0>

In [19]:
ids = vector_store_vme.add_texts(TEXTS, METADATA)
ids

['982e3cd9-93bb-45a2-b5b9-6595af154f70',
 'f2bfacfe-6fd7-4e05-a79d-226490108479',
 '55918511-0784-448b-972e-b4a5dc91d316']

#### 2 - User Provided Embeddings, using class method from_texts

- Texts and metadata are automatically added

In [29]:
COLLECTION_ID_UPE = "upe-ipynb"

vector_store_upe = Vantage.from_texts(
    texts=TEXTS,
    embedding=langchain_embeddings,
    metadatas=METADATA,
    client=vantage_client,
    collection_id=COLLECTION_ID_UPE,
    embedding_dimension=EMBEDDING_DIMENSION,
    user_provided_embeddings=True,
)

vector_store_upe

<langchain_community.vectorstores.vantage.Vantage at 0x121b556f0>

#### 3 - Initialize using existing collection (id)

In [20]:
COLLECTION_ID_EXISTING_ID = "existing-id-ipynb"

vector_store_existing_id = Vantage(
    client=vantage_client,
    embedding=langchain_embeddings,
    collection_id=COLLECTION_ID_EXISTING_ID,
)

ids = vector_store_existing_id.add_texts(TEXTS, METADATA)
ids

['cebdb511-4134-4238-ae38-4ef8b4964f6f',
 '6dbd1b78-9520-48ef-a8b1-06aee6c279a6',
 '20b25c63-f4f0-4165-a33c-e01eec759d05']

#### 4 - Initialize using existing collection (collection object)

In [21]:
COLLECTION_ID_EXISTING_OBJ = "existing-object-ipynb"

collection = vantage_client.get_collection(COLLECTION_ID_EXISTING_OBJ)

vector_store_existing_obj = Vantage(
    client=vantage_client,
    embedding=langchain_embeddings,
    collection=collection,
)

ids = vector_store_existing_obj.add_texts(TEXTS, METADATA)
ids

['51e79685-6992-4637-b6ea-83d5f592b836',
 '7f705706-94c8-4642-a864-4f1ee795dced',
 '223fa89b-94d7-42c9-9e09-854f8c30ba3e']

#### 5 - Initialize from documents

In [23]:
from langchain_core.documents import Document

DOCUMENTS = [Document(page_content=text, metadata=meta) for text, meta in zip(TEXTS, METADATA)]
DOCUMENTS

[Document(page_content='Ted goes to the gym and exercises three times a week during summer.', metadata={'planet': 'Earth', 'something_else': 'Some value'}),
 Document(page_content='Yuriko and Mina are going to Hawaii this summer.', metadata={'planet': 'Earth'}),
 Document(page_content='Many people eat cereal for breakfast.', metadata={'planet': 'Mars'})]

In [27]:
COLLECTION_ID_DOCUMENTS = "documents-ipynb"

vector_store_from_documents = Vantage.from_documents(
    documents=DOCUMENTS,
    embedding=langchain_embeddings,
    client=vantage_client,
    collection_id=COLLECTION_ID_DOCUMENTS,
    embedding_dimension=EMBEDDING_DIMENSION,
    user_provided_embeddings=True,
)

vector_store_from_documents


<langchain_community.vectorstores.vantage.Vantage at 0x121b54d30>

### Example with Document Loader

In [30]:
from langchain.document_loaders.pdf import PyPDFLoader

data = PyPDFLoader(file_path="...")
documents = data.load()
documents[15]

Document(page_content='166  Oh, little prince! Bit by bit I came to understand the secrets of your sad little life . . . For a long time you had found your only entertainment in the quiet pleasure of looking at the sunset. I learned that new detail on the morning of the fourth day, when you said to me:  "I am very fond of sunsets. Come, let us go look at a sunset now."  "But we must wait," I said.  "Wait? For what?"  "For the sunset. We must wait until it is time."  At first you seemed to be very much surprised. And then you laughed to yourself. You said to me:  "I am always thinking that I am at home!"  Just so. Everybody knows that when it is noon in the United States the sun is setting over France.  If you could fly to France in one minute, you could go straight into the sunset, right from noon. Unfortunately, France is too far away for that. But on your tiny planet, my little prince, all you need do is move your chair a few steps. You can see the day end and the twilight falling wh

In [31]:
COLLECTION_ID_DOCUMENT_LOADER = "document-loader-ipynb"

vector_store_document_loader = Vantage.from_documents(
    documents=documents,
    embedding=langchain_embeddings,
    client=vantage_client,
    collection_id=COLLECTION_ID_DOCUMENT_LOADER,
    embedding_dimension=EMBEDDING_DIMENSION,
    user_provided_embeddings=True,
)

vector_store_document_loader

<langchain_community.vectorstores.vantage.Vantage at 0x12274b220>

### Search Collections using LangChain vector store

In [6]:
TEXT_QUERY = "summer"
EMBEDDING_QUERY = langchain_embeddings.embed_query(text=TEXT_QUERY)

In [None]:
# TEXTS = [
#     "Ted goes to the gym and exercises three times a week during summer.",
#     "Yuriko and Mina are going to Hawaii this summer.",
#     "Many people eat cereal for breakfast.",
# ]

# METADATA = [
#     {"planet": "Earth", "something_else": "Some value"},
#     {"planet": "Earth"},
#     {"planet": "Mars"},
# ]

In [12]:
# 1 - Similarity Search (Semantic Search) without filter
query_resuts = vector_store_existing_obj.similarity_search(
    query=TEXT_QUERY,
    vantage_api_key=VANTAGE_API_KEY,
)

query_resuts

[Document(page_content='51e79685-6992-4637-b6ea-83d5f592b836'),
 Document(page_content='7f705706-94c8-4642-a864-4f1ee795dced'),
 Document(page_content='223fa89b-94d7-42c9-9e09-854f8c30ba3e')]

In [13]:
# 2 - Similarity Search by Vector (Embedding Search) without filter
embedding_resuts = vector_store_existing_obj.similarity_search_by_vector(
    embedding=EMBEDDING_QUERY,
    vantage_api_key=VANTAGE_API_KEY,
)

embedding_resuts

[Document(page_content='51e79685-6992-4637-b6ea-83d5f592b836'),
 Document(page_content='7f705706-94c8-4642-a864-4f1ee795dced'),
 Document(page_content='223fa89b-94d7-42c9-9e09-854f8c30ba3e')]

In [14]:
FILTER = '(planet:"Mars")'

In [17]:
# # 3 - Similarity Search (Semantic Search) with filter
query_resuts_filter = vector_store_existing_obj.similarity_search(
    query=TEXT_QUERY,
    vantage_api_key=VANTAGE_API_KEY,
    filter=FILTER,
)

query_resuts_filter

[Document(page_content='223fa89b-94d7-42c9-9e09-854f8c30ba3e')]

In [18]:
# # 4 - Similarity Search by Vector (Embedding Search) with filter
embedding_resuts_filter = vector_store_existing_obj.similarity_search_by_vector(
    embedding=EMBEDDING_QUERY,
    vantage_api_key=VANTAGE_API_KEY,
    filter=FILTER,
)

embedding_resuts_filter

[Document(page_content='223fa89b-94d7-42c9-9e09-854f8c30ba3e')]