# This will be the demo integration of Colivara with Langchain

#### First we need to install the dependancies

In [None]:
%pip install colivara-py --quiet
%pip install langchain --quiet
%pip install langchain-core --quiet
%pip install langchain-openai --quiet

#### Import the required libraries


In [None]:
# utilities
import base64 # for converting docs binaries to base64
from pathlib import Path 
import getpass
import os 
import requests 

from colivara_py import ColiVara 

# langchain tools
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI


In [None]:
# Setup Environment Variables
os.environ['OPENAI_API_KEY'] = getpass.getpass()
os.environ['COLIVARA_API_KEY'] = getpass.getpass()


#### Download example docs for our demo (you can use your own docs as well)

In [None]:
def download_file(url, local_filename):
    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Ensure the 'docs' directory exists
        os.makedirs("docs", exist_ok=True)

        # Write the content to a local file
        with open(local_filename, "wb") as f:
            f.write(response.content)
        print(f"Successfully downloaded: {local_filename}")
    else:
        print(f"Failed to download: {url}")


# URLs and local filenames
files = [
    {
        "url": "https://github.com/tjmlabs/colivara-demo/raw/main/docs/Work-From-Home%20Guidance.pdf",
        "filename": "docs/Work-From-Home-Guidance.pdf",
    },
    {
        "url": "https://github.com/tjmlabs/colivara-demo/raw/main/docs/StaffVendorPolicy-Jan2019.pdf",
        "filename": "docs/StaffVendorPolicy-Jan2019.pdf",
    },
]

In [None]:
# Download each file
for file in files:
    download_file(file["url"], file["filename"])


#### Start Colivara Client and Create Collection `to upsert documents into`.

In [None]:
rag_client = ColiVara(
    base_url="https://api.colivara.com", api_key=os.environ["COLIVARA_API_KEY"]
)

In [None]:
rag_client.create_collection("langchain-demo")

#### Upsert the documents to the collection

In [None]:
def sync_documents():
    # get all the documents under docs/ folder and upsert them to colivara
    documents_dir = Path("docs")
    files = list(documents_dir.glob("**/*"))

    for file in files:
        with open(file, "rb") as f:
            file_content = f.read()
            encoded_content = base64.b64encode(file_content).decode("utf-8")
            rag_client.upsert_document(
                name=file.name,
                document_base64=encoded_content,
                collection_name="langchain-demo",
                wait=True,
            )
            print(f"Upserted: {file.name}")

In [None]:
sync_documents()

There is a step called query transformation, which is the process of converting the user query into a format that the RAG model can understand. 
This step is necessary because the RAG model expects the input to be in a specific format.

***for the purpose of the demo, we will skip it and use the RAG query as the input to the RAG directly.***

Setup ended successfully.
Let's begin work.


In [None]:
query = "What is the work from home policy?"

you can do search by just using this code snippet:

```python
rag_client.search(query="some query",collection_name="collection_name", top_k=5)
``` 

In [None]:
## you can try performing search to see results object structure

# results = rag_client.search(query, collection_name="langchain-demo")
# print(results.results)

In [None]:
# get context using the RAG
def get_context(query):
    results = rag_client.search(query=query, collection_name="langchain-demo", top_k=3)
    results = results.results

    context = []
    for result in results:
        document_title = result.document_name
        page_num = result.page_number
        base64 = result.img_base64
        # base64 doesn't have data: part so we need to add it
        if "data:image" not in base64:
            base64 = f"data:image/png;base64,{base64}"
        context.append(
            {
                "metadata": f"{document_title} - Page {page_num}",
                "base64": base64,
            }
        )
    return context

In [None]:
context = get_context(query)

Now build the prompt using the context and query, and initialize the chat model.


In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that answers questions based on the provided images/docs.",
        ),
        (
            "user",
            [
                {
                    "type": "text",
                    "text": "Here are some images for context:",
                },
                *[ 
                    {
                        "type": "image_url",
                        "image_url": {"url": image_data["base64"]},
                    }
                    for image_data in context
                ], # we are using * above to unpack the list of images (top_k=3)
                {"type": "text", "text": "Now, please answer the following question:"},
                {
                    "type": "text",
                    "text": "{query}",  # Placeholder for the user's query
                },
            ],
        ),
    ]
)

# Initialize the model
model = ChatOpenAI(model="gpt-4o", temperature=0)

In [None]:
# use chain (bro, it is called langchain for a reason) and just invoke.
chain = prompt | model
response = chain.invoke({"query": query})

In [None]:
print(response.content)

## That's it for now. You can continue to explore more features and capabilities of langchain.