## Links

https://python.langchain.com/docs/expression_language/cookbook/retrieval

## Required Imports

In [13]:
import os
from typing import List, Tuple
from langchain.prompts.chat import ChatPromptTemplate
from langchain.prompts.prompt import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableParallel
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.vectorstores.chroma import Chroma
from langchain.docstore.document import Document
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from operator import itemgetter
from langchain.callbacks import get_openai_callback
from langchain.globals import set_verbose, set_debug

debug= False
set_verbose(debug)
set_debug(debug)

OpenAPIKey = os.environ.get("OPENAI_API_KEY")


## Simple Test for a Simple Prompt and Feed to the Model

In [37]:
prompt = PromptTemplate.from_template("tell me a joke about {foo}")
model = ChatOpenAI(api_key=OpenAPIKey)

conversation_chain = prompt | model

conversation_chain.invoke({"foo": "bears"})

StringPromptValue(text='tell me a joke about bears')

## Simple Test to see How ChatPromptTemplate works

In [49]:
prompt = ChatPromptTemplate.from_messages([
                ("system", "You are a helpful AI bot. Your name is {name}."),
                ("human", "Hello, how are you doing?"),
                ("ai", "I'm doing well, thanks!"),
                ("human", "{user_input}"),
            ])

conversation_chain = prompt | model

conversation_chain.invoke({
    "name": "Bob",
    "user_input": "What is your name?"
})

AIMessage(content='My name is Bob. How can I assist you today?')

## Create Chat Prompts

In [23]:
conversation_prompt = PromptTemplate.from_template(
"""Given the following conversation and a follow up question, rephrase the follow up 
question to be a standalone question, in its original language.

Chat History:
{chat_history}

------------------------------
Follow Up Input: {question}
Standalone question:""")

# Answer Prompt is the one that we expect the assistant to provide an answer in
answer_prompt = ChatPromptTemplate.from_template("""{context}

                                          
-----------
Answer the question below based only on the above context (without mention the context in 
the response).

Question: {question}
""")

In [29]:
def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "Human: " + dialogue_turn[0]
        ai = "Assistant: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer

test = RunnableParallel(    
    chat_history=lambda x: _format_chat_history(x["chat_history"]),
    question=lambda x: x["question"],
) | {"mod": lambda x: x["question"]}

test.invoke({
    "chat_history": [("Hello", "Hi"), ("How are you?", "I'm fine")],
    "question": "What is your name?"
})


{'mod': 'What is your name?'}

## Create Conversational Chain & Invoke

In [36]:

model = ChatOpenAI(
    temperature=0,
    api_key=OpenAPIKey,
    model="gpt-3.5-turbo",
)

test = RunnableParallel(    
    chat_history=lambda x: _format_chat_history(x["chat_history"]),
    question=RunnablePassthrough(),
)

with get_openai_callback() as cb:
    result = conversation_chain.invoke({
    "chat_history": [],
    "question": "Given that the universe is vast, I struggle to grasp the context of the meaning of life?"
    })

    print(result)
    print("\n" + "-"* 50)
    print(cb)

{'question': AIMessage(content='What is the context of the meaning of life, considering the vastness of the universe?')}
What is the context of the meaning of life, considering the vastness of the universe?

--------------------------------------------------
Tokens Used: 85
	Prompt Tokens: 67
	Completion Tokens: 18
Successful Requests: 1
Total Cost (USD): $0.0001365


## Use In-Memory Embeddings

If using FAISS
1. ```import sys```
2. ```!{sys.executable} -m pip install faiss-cpu```


In [15]:
loader = DirectoryLoader('./data/training', glob="**/*.md", loader_cls=TextLoader) # TextLoader == raw text
docs = loader.load()

splitter = MarkdownHeaderTextSplitter(headers_to_split_on=[
    ("#", "Header 1"), # Head level, metadata key
    ("##", "Header 2"),
    ("###", "Header 3"),
])

splitted_docs: List[Document] = []

for doc in docs:     
  splitted_docs.extend(splitter.split_text(doc.page_content))

   
chroma = Chroma.from_documents(
  documents=splitted_docs,
  embedding=OpenAIEmbeddings(api_key=OpenAPIKey)
)

result = chroma.similarity_search("How do I install indoor sensors?")

for res in result:
  print(res)

page_content='How do I install the indoor climate sensors?  \nIt is important that you install the indoor sensors in the right place. The sensors should be placed in the middle of the room, at a height of 1.5 meters. The sensors should not be placed in direct sunlight or near heat sources such as radiators or lamps. Do not place any sensors near windows or doors.  \nPlace one sensor at the north wall and one at the south wall to be able to measure the temperature difference between the two sensors.' metadata={'Header 1': 'Crossbreed Smarter Heating', 'Header 2': 'Installation and Positioning of Indoor Climate Sensors'}
page_content='How do I install the indoor climate sensors?  \nIt is important that you install the indoor sensors in the right place. The sensors should be placed in the middle of the room, at a height of 1.5 meters. The sensors should not be placed in direct sunlight or near heat sources such as radiators or lamps. Do not place any sensors near windows or doors.  \nPlac

## Combine Question & Answer Chain with Chat History

In [16]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


answerChain = conversation_chain | {
  "question": RunnablePassthrough(),
  "context": itemgetter("question") | chroma.as_retriever() | format_docs,
} | answer_prompt | model | StrOutputParser()

cnt = 0
max_len = 80

for chunk in answerChain.stream({
    "chat_history": [],
    "question": "How do I install indoor sensors?"
}): 
  if cnt > max_len:
    print("\n", end="")
    cnt = 0
    
  cnt += len(chunk)
  print(chunk, end="", flush=True)

The process for installing indoor sensors is to place them in the middle of the room
 at a height of 1.5 meters. They should not be placed in direct sunlight or near heat
 sources, windows, or doors. Additionally, one sensor should be placed at the north
 wall and one at the south wall to measure the temperature difference between them
.
--------------------------------------------------


## Using MultiQueryRetriever

In [None]:
from langchain.retrievers.multi_query import MultiQueryRetriever
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

retriever = MultiQueryRetriever.from_llm(
    retriever=chroma.as_retriever(), llm=model
)

unique_docs = retriever.get_relevant_documents(query="How do I install indoor sensors for best results?")
for doc in unique_docs:
    print(doc.page_content)

## Parent Document Retriever / MultiVector Retriever

TODO: Do some testing with this

https://python.langchain.com/docs/modules/data_connection/retrievers/parent_document_retriever
https://python.langchain.com/docs/modules/data_connection/retrievers/multi_vector