In [None]:
from textwrap import dedent

import pyspark.sql.functions as F
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ChatMessageHistory, ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_community.document_loaders.dataframe import DataFrameLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma, FAISS
from langchain_core.prompts import PromptTemplate

In [None]:
template = """
# CONTEXT #
You are an expert in using the python gdal and pandas library.
{context}

# OBJECTIVE #
{question}

# STYLE #
Follow the writing style of Python code.

# TONE #
Persuasive

# AUDIENCE #
Technical

# RESPONSE #
Just show the Python code and do NOT provide any comments. DO NOT EXECUTE THE CODE.
"""
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [None]:
prompt.invoke({"context":"FooBar", "question":
               "Given a GeoTIFF with one band where the pixel value is a float, and a pandas dataframe with x and y series, find pixel value for each x and y."})

In [None]:
# llm = ChatOllama(model="mistral:instruct")
# llm = ChatOllama(model="openhermes")
llm = ChatOllama(model="mistral")

In [None]:
embedding = OllamaEmbeddings(model="nomic-embed-text")

In [None]:
loader = PyPDFLoader("/Users/mraad/Downloads/3615886.3627740.pdf")
docs = loader.load_and_split()

In [None]:
from chromadb.config import Settings  # https://docs.trychroma.com/telemetry

In [None]:
client_settings = Settings(anonymized_telemetry=False)
db = Chroma.from_documents(docs, embedding=embedding, client_settings=client_settings)
# db = FAISS.from_documents(docs, embedding)

In [None]:
message_history = ChatMessageHistory()

# Memory for conversational context
memory = ConversationBufferMemory(
    memory_key="chat_history",
    output_key="answer",
    chat_memory=message_history,
    return_messages=True,
)

# Create a chain that uses the Chroma vector store
chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(),
    memory=memory,
    return_source_documents=False,
)

In [None]:
res = chain.invoke(
    dedent(
        """
        Please summarize in 2 paragrams THE SRAI LIBRARY.
        """
    )
)

In [None]:
print(res.keys())

In [None]:
print(res["answer"])

In [None]:
# res["source_documents"]