# SAP HANA Cloud Vector Engine

>SAP HANA Cloud Vector Engine is a vector store fully integrated into the SAP HANA Cloud database.

Installation of the HANA database driver.

In [None]:
# Pip install necessary package
%pip install --upgrade --quiet  hdbcli

To use `OpenAIEmbeddings` so we have to get the OpenAI API Key.

In [1]:
import os
# Use OPENAI_API_KEY env variable 
# os.environ["OPENAI_API_KEY"] = "Your OpenAI API key"

In [2]:
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.hanavector import HanaDB
from langchain_openai import OpenAIEmbeddings

In [None]:
text_documents = TextLoader("../../modules/state_of_the_union.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
text_chunks = text_splitter.split_documents(text_documents)
print(f"Number of document chunks: {len(text_chunks)}")

embeddings = OpenAIEmbeddings()

In [50]:
from hdbcli import dbapi

# Use connection settings from the environment
connection = dbapi.connect(
    address=os.environ.get("HANA_DB_ADDRESS"),
    port=os.environ.get("HANA_DB_PORT"),
    user=os.environ.get("HANA_DB_USER"),
    password=os.environ.get("HANA_DB_PASSWORD"),
    autocommit=True,
    sslValidateCertificate=False,
)

## Similarity Search with Cosine Simliarity (Default)

The embeddings that are used are specified by the given table name.
If the table does not exist, a new table is created 

In [None]:
db = HanaDB(
    embedding=embeddings,
    connection=connection,
    table_name = "STATE_OF_THE_UNION"
)

# Delete already existing documents from the table
db.delete(filter={})

# add the loaded document chunks
db.add_documents(text_chunks)

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query, k=2)

for doc in docs:
    print("-" * 80)
    print(doc.page_content)

# Query the same content with Euclidian Distance

In [None]:
from langchain_community.vectorstores.utils import DistanceStrategy
db = HanaDB(
    embedding=embeddings,
    connection=connection,
    distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE,
    table_name = "STATE_OF_THE_UNION"
)

query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query, k=2)
for doc in docs:
    print("-" * 80)
    print(doc.page_content)

## Maximal Marginal Relevance Search (MMR)
Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

In [None]:
docs = db.max_marginal_relevance_search(query, k=2)
for doc in docs:
    print("-" * 80)
    print(doc.page_content)


## Basic Vectorstore Operations

In [None]:
db = HanaDB(
    connection=connection,
    embedding=embeddings,
    table_name = "LANGCHAIN_DEMO_BASIC"
)

# Delete already existing documents from the table
db.delete(filter={})

### Add plain documents
We can add documents to the existing table.

In [None]:
docs = [Document(page_content="plain"), Document(page_content="docs")]
db.add_documents(docs)

Add documents with metadata

In [None]:
docs = [Document(page_content="foo", metadata={"start": 100, "end": 150, "doc_name": "foo.txt", "quality": "bad"}), 
        Document(page_content="bar", metadata={"start": 200, "end": 250, "doc_name": "bar.txt", "quality": "good"})]
db.add_documents(docs)

Query documents with specific metadata

In [None]:
docs = db.similarity_search("foobar", k=2, filter={"quality": "bad"})
# With filtering on "quality"=="bad", only one document should be returned
for doc in docs:
    print("-" * 80)
    print(doc.page_content)
    print(doc.metadata)

### Using a VectorStore as a Retriever in Chains for retrieval augmented generation (RAG)


In [None]:
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory

# Access the vector DB with the previously filled table STATE_OF_THE_UNION
db = HanaDB(
    connection=connection,
    embedding=embeddings,
    table_name = "LANGCHAIN_DEMO_RETRIEVAL_CHAIN"
)

llm = ChatOpenAI(model_name='gpt-3.5-turbo')
memory = ConversationBufferMemory(memory_key="chat_history", output_key='answer', return_messages=True)
retriever = db.as_retriever()

#### Define the prompt

In [83]:
from langchain.prompts import PromptTemplate
prompt_template = '''
You are an expert state of the union topics. You are provided multiple context items that are related to the prompt you have to answer.
Use the following pieces of context to answer the question at the end.

```
{context}
```

Question: {question}
'''

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}

#### Create the ConversationalRetrievalChain which handles the chat history and the retrieval of similar document chunks to be added to the prompt

In [84]:
from langchain.chains import ConversationalRetrievalChain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    db.as_retriever(search_kwargs={'k': 5}),
    return_source_documents=True,
    memory=memory,
    verbose=False,
    combine_docs_chain_kwargs={'prompt': PROMPT})

#### Ask the first question (and verify how many text chunks have been used)

In [None]:
question = "What about Mexico and Guatemala?"

result = qa_chain({"question": question})
print('Answer from LLM:')
print('================')
print(result["answer"])

source_docs = result["source_documents"]
print('================')
print(f"Number of used source document chunks: {len(source_docs)}")
