In [None]:
## ENSURE THAT THE OPENSEARCH SERVICE (AND OPTIONALLY OPENSEARCH-DASHBOARDS) IN docker-compose.yml
### IS UNCOMMENTED AND RUNNING IF YOU WANT TO USE THE CONTAINERIZED INSTANCES

# https://python.langchain.com/v0.2/docs/integrations/llms/llamacpp/#usage

In [None]:
from langchain_community.llms import LlamaCpp
from langchain import HuggingFacePipeline
from langchain_core.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import OpenSearchVectorSearch
from langchain.document_loaders import TextLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler

from huggingface_hub import hf_hub_download
from textwrap import dedent

In [None]:
# OPENSEARCH_HOST = "localhost" # When running notebook outside of compoose jupyter container
OPENSEARCH_HOST = "opensearch" # When running notebook inside of compose jupyter container
OPENSEARCH_PORT = 9200
OPENSEARCH_HTTP_URL= f"http://{OPENSEARCH_HOST}:{OPENSEARCH_PORT}"
OPENSEARCH_USER = "admin"
OPENSEARCH_PASSWORD = "admin"

In [None]:
# embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings_model_name = "thenlper/gte-large"

In [None]:
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

In [None]:
loader = TextLoader("/workspace/data/state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

In [None]:
docsearch = OpenSearchVectorSearch.from_documents(
    docs,
    embeddings,
    engine="faiss",
    space_type="innerproduct",
    ef_construction=256,
    m=48,
    opensearch_url=OPENSEARCH_HTTP_URL,
    http_auth=(OPENSEARCH_USER, OPENSEARCH_PASSWORD),
    use_ssl = False,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
)

In [None]:
query = "How much does the president want to cut the cancer death rate?"
docs = docsearch.similarity_search(query, k=10)

In [None]:
docs[0].page_content

In [None]:
# model_name = "PY007/TinyLlama-1.1B-Chat-v0.3"
# model_name = "TheBloke/CollectiveCognition-v1.1-Mistral-7B-GPTQ"
MODEL_NAME="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF"
MODEL_FILE="Meta-Llama-3-8B-Instruct-Q4_K_M.gguf"
model_file_path = hf_hub_download(repo_id=MODEL_NAME, filename=MODEL_FILE)

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_file_path,
    temperature=0.75,
    max_tokens=75,
    top_p=1,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
    n_gpu_layers=33
)

## Using the Model without any additional context

In [None]:
prompt = PromptTemplate.from_template("""
Question: {query}

Answer:""")

In [None]:
chain = prompt | llm

In [None]:
query = "How much does the president want to cut the cancer death rate?"

In [None]:
generated_text = chain.invoke(query)

In [None]:
print(generated_text)

## Using the model with retrieval

In [None]:
def run_retrieval_qa(query, k=4, max_new_tokens=20, return_sources=True, repetition_penalty=1.0, remove_tokens=("<s>","</s>"), verbose=False):
    
    global llm
    
    docs = docsearch.similarity_search(query, k=k)

    context = "\n".join([doc.page_content for doc in docs])

    template = PromptTemplate.from_template(f""""
    Context: {context}

    Question: {query}

    Answer:""")

    llm.max_tokens = max_new_tokens
    # llm = LlamaCpp(
    #     model_path=model_file_path,
    #     temperature=0.75,
    #     max_tokens=max_new_tokens,
    #     top_p=1,
    #     callback_manager=callback_manager,
    #     verbose=verbose,  # Verbose is required to pass to the callback manager
    #     # n_gpu_layers=-1
    # )

    chain = template | llm

    ## The batch_decode call below removes the input tokens
    generated_text = chain.invoke({"context":context, "query":query})
    
    for token in remove_tokens:
        generated_text = generated_text.replace(token,"")
    generated_text = generated_text.strip('" \n')
    
    output = {
        "text": generated_text
    }
    if return_sources: output["sources"] = docs
    return output

In [None]:
res = run_retrieval_qa(query, max_new_tokens=120, repetition_penalty=1.1)

In [None]:
res

## Indexing Multiple Vector Fields

### Create a "summary" field for the documents we want to index, so we will have both a "page_content" field and a "summary" field to embed and store

In [None]:
opensearch_docs = [doc.dict() for doc in docs]

In [None]:
def summarize_text(text:str, max_new_tokens=200, repetition_penalty=1.1):
    
    summarization_template_string = """Summarize the following information. Capture the important information, but be as concise as possible.

    Information: {document}

    Summary: """
    summarization_template = PromptTemplate.from_template(summarization_template_string)

    llm.max_tokens = max_new_tokens

    chain = summarization_template | llm

    ## The batch_decode call below removes the input tokens
    generated_text = chain.invoke(text)
    
    return generated_text

In [None]:
for doc in opensearch_docs:
    text = doc["page_content"]
    summary = summarize_text(text)
    doc["summary"] = summary

### Create embeddings for the "page_content" and "summary" fields

In [None]:
def generate_embeddings(text:str, embedding_model=embeddings):
    return embedding_model.embed_documents(text)[0]

In [None]:
for doc in opensearch_docs:
    for field in ["page_content", "summary"]:
        doc[f"{field}_vector"] = generate_embeddings(doc[field])    

### Write Documents to Opensearch Index

In [None]:
index_name = "my-multi-vector-index"

In [None]:
from opensearchpy import OpenSearch
from hashlib import sha1
import json

In [None]:
client = OpenSearch(
    hosts=OPENSEARCH_HTTP_URL,
    http_auth=(OPENSEARCH_USER, OPENSEARCH_PASSWORD),
    use_ssl=False,
    verify_certs=False,
    ssl_assety_hostname=False,
    ssl_show_warn=False
    )

In [None]:
def create_index(index, mappings={}, opensearch_client=client, replace_existing=False, number_of_shards=1):
    
    if replace_existing:
        opensearch_client.indices.delete(index, ignore_unavailable=True)
        print(f"Deleted existing index: {index}")

    index_body = {
        'settings': {
            'index': {
                'knn': True,
                "knn.algo_param.ef_search": 256,
                'number_of_shards':number_of_shards
            }
        },
        "mappings" : mappings
    }
    response = opensearch_client.indices.create(index=index_name,body=index_body)
    return response


In [None]:
mappings = {
    "properties" :  {
        "page_content_vector" : {
            "type" : "knn_vector",
            "dimension": embeddings.client.get_sentence_embedding_dimension(),
            "method": {
            "name": "hnsw"
          }
        },
        "summary_vector" : {
            "type" : "knn_vector",
            "dimension": embeddings.client.get_sentence_embedding_dimension(),
            "method": {
            "name": "hnsw"
          }
        }
    }
}


create_index(index=index_name, mappings=mappings, replace_existing=True)

In [None]:
bulk_actions_string = "\n".join([
f"""{json.dumps({"index": {
                "_index":index_name,
                 "_id":sha1(doc["page_content"].encode()).hexdigest()
                 }})}
{json.dumps(doc)}
""" for doc in opensearch_docs])

In [None]:
client.bulk(bulk_actions_string)

In [None]:
def vector_field_search(query:str, field_name:str, k=3, size=3, index=index_name, opensearch_client=client, embedding_model=embeddings):
    """k is the number of neighbors the search of each graph will return. You must also include the size option, which indicates how many results the query actually returns. 
    The plugin returns k amount of results for each shard (and each segment) and size amount of results for the entire query. The plugin supports a maximum k value of 10,000.
    """
    query_body = {
        "size": size,
        "query": {
            "knn": {
                field_name : {
                    "vector": generate_embeddings(text=query, embedding_model=embedding_model),
                    "k": k
                }
            }
        }
    }

    # query_body = {
    #     "size": size,
    #     "query": {
    #     "script_score": {
    #         "query": {
    #         "match_all": {}
    #         },
    #         "script": {
    #         "source": "knn_score",
    #         "lang": "knn",
    #         "params": {
    #             "field": field_name,
    #             "query_value": generate_embeddings(text=query, embedding_model=embedding_model),
    #             "space_type": "cosinesimil"
    #         }
    #         }
    #     }
    #     }
    #     }

    return opensearch_client.search(index=index, body=query_body)
    

In [None]:
results = vector_field_search(query="International supply chain", field_name="page_content_vector")

In [None]:
{k:v for k,v in results["hits"]["hits"][0]["_source"].items() if k in ["page_content","summary"]}

## BELOW STILL IN PROGRESS

## Retrieval with Reranking (Boosting with Text Search)

In [None]:
from opensearchpy import OpenSearch

In [None]:
client = OpenSearch(
    hosts=OPENSEARCH_HTTPS_URL,
    http_auth=(OPENSEARCH_USER, OPENSEARCH_PASSWORD),
    use_ssl=True,
    verify_certs=False,
    ssl_assety_hostname=False,
    ssl_show_warn=False
    )

In [None]:
os_query = {
  'size': 5,
  'query': {
    'multi_match': {
      'query': query,
    #   'fields': ['title^2', 'director']
    }
  }
}

In [None]:
os_res = client.search(os_query)

In [None]:
os_res["hits"]["hits"]