In [2]:
from elqm.utils.dataFinder import get_data_dir
from elqm.backend.utils import get_es_connection
import os
import time

DATA_DIR = os.path.abspath(get_data_dir("eur_lex_data"))
PREPROCESSED_DATA_DIR = os.path.abspath(get_data_dir("preprocessed"))

print("DATA_DIR: ", DATA_DIR)
print("PREPROCESSED_DATA_DIR: ", PREPROCESSED_DATA_DIR)

from langchain.llms import Ollama

# Initilize the LLM model
llm = Ollama(model="llama2")

DATA_DIR:  /home/computerman/Desktop/NLPT/elqm-INLPT-WS2023/elqm-raw/eur_lex_data
PREPROCESSED_DATA_DIR:  /home/computerman/Desktop/NLPT/elqm-INLPT-WS2023/elqm-raw/preprocessed


## Prompt

In [2]:
from langchain_core.prompts import ChatPromptTemplate

questionPrompt = ChatPromptTemplate.from_template(
"""Answer the question based only on the following context and on the conversation history:
{context}

Question:
{question}
"""
)

In [3]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage

systemString = """You are ELQM, a helpful and specialized assistant for question-answering tasks \
in the domain of energy law. Use the following pieces of retrieved context comprised of EU \
regulations and other legal documents to answer the question. If you don't know the answer \
or the question cannot be answered with the context, admit that you cannot answer the \
question due to the limited available context. Furthermore, if the user asks a generic \
question or other situations occur, in which the context is not helpful, kindly remember the \
user of your purpose. Your answers should not include any racist, sexist and toxic content."""

systemMessage = SystemMessage(content=systemString)


In [4]:

from langchain_core.prompts import MessagesPlaceholder

historyPrompt = MessagesPlaceholder(variable_name="history")

In [5]:
final_prompt = (systemMessage + historyPrompt + questionPrompt)

In [None]:
final_prompt.format_messages(context="Hello", question="Test", history="This is the history")

In [6]:
prompt = final_prompt

## Retriever

In [6]:
from langchain.embeddings import GPT4AllEmbeddings

embeddings = GPT4AllEmbeddings();

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [7]:
from langchain_community.vectorstores import FAISS




vectorstore = FAISS.from_texts(
    ["harrison worked at kensho",
     "Josh worked at John Deere",
     "Sabrina worked at Google",
     "Jasmin worked at Continental",
     "James worked at Microsoft",
     "Joshua worked at the local mechanics shop",
     "Nikita worked at Rechner Sensors",
     "Nikita worked at the University",
     "Kira worked at the school",
     "Uli worked at the airport",
     "Maria worked at Mitsubishi Chemical"],
    embedding=GPT4AllEmbeddings()
)

# As default the retreiver outputs 4 documents
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})

bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522
bert_load_from_file: gguf version     = 2
bert_load_from_file: gguf alignment   = 32
bert_load_from_file: gguf data offset = 695552
bert_load_from_file: model name           = BERT
bert_load_from_file: model architecture   = bert
bert_load_from_file: model file type      = 1
bert_load_from_file: bert tokenizer vocab = 30522


In [None]:
query = "Where did Harrison work?"
docs = retriever.invoke(query)
print(type(docs))
print("Number of docs:", len(docs))
print()
for i, doc in enumerate(docs):
    print(f"Document {i}")
    print("Content", doc.page_content)
    print("Metadata:", doc.metadata)
    print()

## Debug output is good good

In [None]:
import langchain
langchain.debug = False

## Chain

### The following two seem so be equivalent

In [None]:
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain2 = setup_and_retrieval | prompt | llm | StrOutputParser()

### Try to make a class with different components

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)

result = setup_and_retrieval.invoke("Where did Harrison work?")
print(result)

In [None]:
from langchain_core.runnables import chain
from langchain_core.output_parsers import StrOutputParser

setup_and_retrieval_runnable = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)

@chain
def custom_chain(question):
    retrival_output = setup_and_retrieval.invoke(question)
    prompt_output = prompt.invoke(retrival_output)
    llm_output = llm.invoke(prompt_output)
    anwser = StrOutputParser().invoke(llm_output)
    return anwser, retrival_output, prompt_output, llm_output
    

In [None]:
custom_chain.invoke("Where did Harrison work and what did he do?")

In [None]:
custom_chain.get_graph().print_ascii()

Class is nice but it is crap that the chain here does not show because it is defined explicetly

### Make a parallel pass through that passes retrieved data all the way through

In [None]:
import langchain_core.prompt_values

def clean_retriever_string(retrievedDocuments: langchain_core.prompt_values.ChatPromptValue):
    

In [8]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

chain31 = retriever

chain32 = {"context": retriever,
           "question": RunnablePassthrough()} | prompt

chain33 = (llm | StrOutputParser())

chain34 = (
    chain32 | RunnableParallel(completly_processed=chain33,
                               prompted=RunnablePassthrough())
)

chain35 = RunnableParallel(completly_and_prompted=chain34,
                           retreived=chain31)

In [None]:
result = chain32.invoke("Where did Harrison work and what did he do?")
print(type(result))
final_output = result.to_messages()
print(type(final_output))
print(len(final_output))
print(final_output[0])

In [None]:
result = chain35.invoke("Where did Harrison work and what did he do?")
print(result)

In [9]:
chain35.get_graph().print_ascii()

                        +-------------------------------------------------+                   
                        | Parallel<completly_and_prompted,retreived>Input |                   
                        +-------------------------------------------------+                   
                                     ****                      *****                          
                                 ****                               *****                     
                               **                                        *****                
         +---------------------------------+                                  ***             
         | Parallel<context,question>Input |                                    *             
         +---------------------------------+                                    *             
                  ***            ***                                            *             
                **                  **            

really really nice. this is exactly what we want. Now add memory
https://www.reddit.com/r/LangChain/comments/18yovcm/please_help_with_langchain_want_both_document/.
TODO for later: How to convert the retriever output into something nice?

### Adding conversation memory to the chain

In [None]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.memory import ChatMessageHistory
from langchain_core.runnables.utils import ConfigurableFieldSpec
from typing import Optional

store = {}

def get_session_history(user_id: str, conversation_id: str) -> ChatMessageHistory:
    if (user_id, conversation_id) not in store:
        store[(user_id, conversation_id)] = ChatMessageHistory()
    return store[(user_id, conversation_id)]


with_message_history = RunnableWithMessageHistory(
    chain35,
    get_session_history=get_session_history,
    input_messages_key="question",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="user_id",
            annotation=str,
            name="User ID",
            description="Unique identifier for the user.",
            default="",
            is_shared=True,
        ),
        ConfigurableFieldSpec(
            id="conversation_id",
            annotation=str,
            name="Conversation ID",
            description="Unique identifier for the conversation.",
            default="",
            is_shared=True,
        ),
    ],
)

In [None]:
with_message_history.invoke(
    {"ability": "math", "question": "What does cosine mean?"},
    config={"configurable": {"user_id": "123", "conversation_id": "1"}}
)

In [10]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import RedisChatMessageHistory

chain_with_history = RunnableWithMessageHistory(
    chain35,
    RedisChatMessageHistory,
    input_messages_key="question",
    history_messages_key="history",
)

In [11]:
chain_with_history.invoke("Where did Harrison work and what did he do?",
                          config={"configurable": {"session_id": "foo"}})

ImportError: Could not import redis python package. Please install it with `pip install redis`.

### Turns out this is legacy code

In [None]:
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferWindowMemory(k=5, memory_key="chat_history")
qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    combine_docs_chain_kwargs={"prompt": prompt},
    memory=memory,
    get_chat_history=lambda h : h,
    )

## Invoke

In [None]:
chain.get_prompts("Hi")

In [None]:
chain.get_graph().print_ascii()

In [None]:
chain.invoke("Where did Harrison work and what did he do?")

In [None]:
chain2.invoke("where did harrison work?")

In [None]:
question = "where did harrison work?"
history = ""
result = qa_chain.invoke({"question": question, "chat_history": history})
print(result["answer"])