# Langchain

Code from https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa

In [None]:
!pip install langchain
!pip install gpt4all
!pip install chromadb
!pip install llama-cpp-python
!pip install langchainhub

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import LlamaCpp
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain import hub
from langchain_core.runnables import RunnablePassthrough, RunnablePick
import pandas as pd


In [None]:
# load custom dataset
with open("all_mail_contents.txt", "r", encoding="utf-8") as f:
	all_mail_contents = f.read()

In [None]:
# convert to langchain document format
doc =  Document(page_content=all_mail_contents, metadata={"source": "local"})
#split up
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
all_splits = text_splitter.split_documents([doc])

In [None]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

In [None]:
n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path=r"E:\AllProgramming\Llama2\llama.cpp\llama-2-7b-chat\ggml-model-f16_q4_1.gguf",
    # model_path=r"E:\AllProgramming\Llama2\llama.cpp\llama-2-7b-chat\ggml-model-f16_q4_1.gguf",
    # model_path=r"E:\AllProgramming\Llama2\llama.cpp\llama-2-13b-chat\ggml-model-f16.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    # n_ctx=2048,
    n_ctx=1024,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    verbose=True,
)

In [None]:
question = "What can you do on Medium?"
llm.invoke(question) 

In [None]:
# #this code can be used to see if the correct documents are retrieved. The documents retrieved should be regarding your questions, and is the data the LLM uses to answer the questions
# question = "Medium"
# docs = vectorstore.similarity_search(question)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
# retrieve relevant docs

rag_prompt = hub.pull("rlm/rag-prompt")
rag_prompt.messages

retriever = vectorstore.as_retriever()
qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [None]:
question = "What is the topic of my last Microsoft email I have gotten? Answer in detail"

In [None]:
qa_chain.invoke(question)

In [None]:
#try llama file #TODO 

In [None]:
from langchain_community.llms import GPT4All

llm = GPT4All(
    model=r"C:\Users\eivin\Documents\Programming\RAG_testing\tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf",
    max_tokens=2048,    
)
llm.invoke("Can you answer questions?")

In [None]:
from langchain_community.llms import GPT4All

llm = GPT4All(
    model=r"all-MiniLM-L6-v2-f16.gguf",
    max_tokens=2048,
)
# llm.invoke("Can i ask question?")

In [None]:
from langchain_community.llms import GPT4All

llm = GPT4All(
    model="mistral-7b-instruct-v0.1.Q4_0.gguf",
    max_tokens=2048,
)

In [None]:
local_path = "mistral-7b-instruct-v0.1.Q4_0.gguf"

In [None]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import GPT4All


# Callbacks support token-wise streaming
callbacks = [StreamingStdOutCallbackHandler()]

# Verbose is required to pass to the callback manager
llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)