### Embedding Models


To determine the most suitable embedding model for our use case, we evaluated the Sentence Transformers models all-mpnet-base-v2, all-MiniLM-L6-v2, and all-MiniLM-L12-v2. The <a href=https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2>all-MiniLM-L12-v2</a> model provided the best outcomes.

In [None]:
#!pip install langchain sentence_transformers faiss-cpu pypdf InstructorEmbedding -U sentence-transformers

In [None]:
#!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python

### Setting Llama Modell

In [None]:
#!wget https://huggingface.co/TheBloke/Llama-2-7B-chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf

In [None]:
%ls

In [None]:
from langchain.chains import LLMChain
from langchain.embeddings import LlamaCppEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp

In [None]:
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
# DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information"
DEFAULT_SYSTEM_PROMPT = "Answer the question by the document you have."

SYSTEM_PROMPT = B_SYS + DEFAULT_SYSTEM_PROMPT +E_SYS

In [None]:
def get_prompt(instruction):
    return B_INST + SYSTEM_PROMPT + instruction + E_INST

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

In [None]:
prompt_template = B_INST +SYSTEM_PROMPT + "{user_message}" + E_INST
prompt_template

In [None]:
n_batch = 512
n_gpu_layers = 1
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Alternative: llama = LlamaCppEmbeddings
llama = LlamaCpp(model_path="llama-2-7b-chat.Q4_K_M.gguf",
                           n_batch = n_batch,
                           n_gpu_layers = n_gpu_layers,
                           n_ctx=4096,
                           f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
                           callback_manager=callback_manager,
                           verbose=True,
                           )

In [None]:
llm_chain = LLMChain(llm=llama, prompt=PromptTemplate.from_template(prompt_template))  

### Faiss with Retriever

In [None]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader


from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_dir = "/content/gdrive/My Drive"

In [None]:
# loader = TextLoader('.txt')
loader = DirectoryLoader(f'{root_dir}/Documents/', glob="./*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()

In [None]:
len(documents)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunked_data = text_splitter.split_documents(documents)

### HuggingFace Embeddings

In [None]:
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
embeddings = model.encode(sentences)
#print(embeddings)

In [None]:
from sentence_transformers import SentenceTransformer
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L12-v2",
        model_kwargs={"device": "cuda"},) # cpu lokal

In [None]:
from langchain.embeddings import HuggingFaceInstructEmbeddings
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L12-v2",
                                                      model_kwargs={"device": "cuda"})

In [None]:
db = FAISS.from_documents(chunked_data, embedding_function)

query = "How many Use Case are there?"
docs = db.similarity_search(query)

print(docs[0].page_content)

In [None]:
retriever = db.as_retriever(search_kwargs={"k": 3})

In [None]:
qa_chain = RetrievalQA.from_chain_type(llama,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [None]:
import textwrap

def wrap_text_preserve_newlines(text, width=110):
    lines = text.split('\n')
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [None]:
query = "What is the project about?"
llm_response = qa_chain(query)
print("/")
process_llm_response(llm_response)

In [None]:
query = "How many use cases are mentioned?"
llm_response = qa_chain(query)
print(" / ")
process_llm_response(llm_response)

In [None]:
# Mit Callbackmanager
query = "When is the deadline for the video?"
llm_response = qa_chain(query)
print(" / ")
process_llm_response(llm_response)

### Retriever

In [None]:
qa_chain.retriever.search_type , qa_chain.retriever.vectorstore

In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

In [None]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llama, db.as_retriever(), return_source_documents=True)

In [None]:
chat_history = []

query = "How many Uses Cases are in the project mentioned?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

In [None]:
chat_history = [(query, result["answer"])]

query = "What is the project about?"
result = chain({"question": query, "chat_history": chat_history})

print(result['answer'])

In [None]:
print(result['source_documents'])