In [None]:
#model + Installation guide is here: please read
#https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/tree/main
#https://python.langchain.com/docs/integrations/llms/llamacpp

In [None]:
!pip install langchain

In [None]:
from langchain.document_loaders import TextLoader
from langchain.document_loaders.csv_loader import CSVLoader

from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate

from langchain.text_splitter import CharacterTextSplitter

In [None]:
model_path = "model/llama-2-7b-chat.Q6_K.gguf"
n_gpu_layers = 1
n_batch = 512
source_text = "docs/SAMPLE.csv"
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# load the model
llm = LlamaCpp(
    model_path=model_path,
    temperature=0.75,
    max_tokens=400,
    top_p=1,
    callback_manager=callback_manager,
    verbose=True,  # Verbose is required to pass to the callback manager
)



In [None]:
#load data
loader = CSVLoader(source_text)
documents = loader.load()

text_splitter = CharacterTextSplitter(
    separator = "######",
    chunk_size = 500,
    chunk_overlap  = 0,
    length_function = len,
    is_separator_regex = False,
)

texts = text_splitter.split_documents(documents)



In [None]:

# load the embedding model
model_name = "hkunlp/instructor-large"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
embeddings = HuggingFaceInstructEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# create embedding from the documents
vectorstore = Chroma.from_documents(documents=texts, embedding=embeddings)

retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.6,'fetch_k': 50})


In [None]:
# create a prompt
template = """You are a helpful, respectful and honest QF Governance Manual assistant. Must use the following pieces of context to answer the question only.
If you don't know the answer from the provided context, don't make up an answer. Also, do not replace any word with your own word.
Use three sentences maximum and keep the answer as concise as possible.

{context}
Question: {question}
Helpful Answer:"""

prompt = PromptTemplate.from_template(template)

llm_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": question},
    return_source_documents=True
)


In [None]:
question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
llm_chain = LLMChain(prompt=prompt, llm=llm)