In [None]:
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### Python packages

In [None]:
!pip install langchain sentence_transformers InstructorEmbedding pypdf chromadb llama-cpp-python openai

#### imports

In [None]:
from typing import Any, Dict

from langchain.document_loaders import DirectoryLoader, PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import LlamaCpp, OpenAI, TextGen
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

import os
import urllib.request

#### documents loaders

In [None]:
directory:str = './Documents'
os.makedirs(directory, exist_ok=True)

In [None]:
#run this after uploading the PDFs to the directory

PDFloader = DirectoryLoader(directory, glob='./*.pdf', loader_cls=PyPDFLoader)
Textloader = DirectoryLoader(directory, glob='./*.txt', loader_cls=TextLoader)

documents = PDFloader.load()+Textloader.load()

#### document splitter

In [None]:
chunk_size: int = 512 # use value from 0 to 512 
chunk_overlap: int = 0 # use value from 0 to 512 
    
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

chunks = text_splitter.split_documents(documents=documents)

#### Embeddings


In [None]:
#device = 'cpu' #for cuda need to install torch compiled with cuda 

model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': device})

# model_name = 'hkunlp/instructor-base'
# embeddings = HuggingFaceInstructEmbeddings(model_name=model_name, model_kwargs={'device': device})

#### vectorstore

In [None]:
k: int = 5 # number of chunks to retrieve
persist_directory: str = './index'

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)

retriever = vectorstore.as_retriever()

retriever.search_kwargs["k"] = k

#### model download for llama.cpp

In [None]:
repo_id, name = 'TheBloke/StableBeluga-13B-GGML', 'stablebeluga-13b.ggmlv3.q2_K.bin'



os.makedirs('./Models', exist_ok=True)
filename = f'./Models/{name}'
if not os.path.isfile(filename):
    url = f'https://huggingface.co/{repo_id}/resolve/main/{name}'
    urllib.request.urlretrieve(url=url, filename=filename)
    print("File downloaded successfully.")

#### llm parameters

In [None]:
temperature: float = 0.1
top_p: float = 0.1
max_tokens: int = 2048
top_k: int = 40
stopping_strings = ['### System:', '### User:', '\n\n']

#### llm

In [None]:

textgen = TextGen(
    model_url='http://127.0.0.1:5000', # must use public-api in textgen webui
    temperature=temperature,
    max_new_tokens=max_tokens,
    top_p=top_p,
    top_k=top_k,
    stopping_strings=stopping_strings,
    )

openai = OpenAI(
                model_name='text-davinci-003',
                openai_api_base='https://api.openai.com/v1',
                openai_api_key='sk-111111111111111111111111111111111111111111111111',
                streaming=False,
                temperature=temperature,
                max_tokens=max_tokens,
                top_p=top_p,
                )

llamacpp = LlamaCpp(
                temperature=temperature,
                max_tokens=max_tokens,
                top_p=top_p,
                top_k=top_k,
                stop=stopping_strings,
                model_path=filename,
               )

llm = llamacpp # textgen, openai, llamacpp, change this to switch llm backend

#### memory


In [None]:
class AnswerConversationBufferMemory(ConversationBufferMemory):
    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
        return super(AnswerConversationBufferMemory, self).save_context(inputs,{'response': outputs['answer']})
    
memory = AnswerConversationBufferMemory(memory_key="chat_history", return_messages=True)

#### Prompt

In [None]:
# Prompt template: Orca-Hashes

sys_name = '### System:'
user_name = '### User:'
input_name = '### Input:'
res_name = '### Response:'


system = 'You are an AI assistant that follows instruction extremely well. Help as much as you can.'

# condense question prompt

user = 'Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.'
input_text = 'Chat History:\n{chat_history}\nFollow up question: {question}'
response = 'Standalone question:'

condense_question_prompt = PromptTemplate.from_template(template=f'{sys_name}\n{system}\n\n{user_name}\n{user}\n\n{input_name}\n{input_text}\n\n{res_name}\n{response}')

# qa prompt
user = '''Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.'''
input_text = 'Question: {question}\nPieces of context:\n{context}'
response = 'Helpful Answer:'

qa_prompt = PromptTemplate.from_template(template=f'{sys_name}\n{system}\n\n{user_name}\n{user}\n\n{input_name}\n{input_text}\n\n{res_name}\n{response}')

#### chain

In [None]:
chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=retriever,
            condense_question_prompt=condense_question_prompt,
            return_source_documents=True,
            combine_docs_chain_kwargs={"prompt": qa_prompt},
            rephrase_question = True,
            memory=memory,
            verbose=False
        )

In [None]:
def process_response(res):
    answer = res["answer"]
    source_documents = {}

    for document in res['source_documents']:
        page_content = document.page_content
        source = document.metadata['source']
        page = document.metadata['page']
        document_string = f'contenido: "{page_content}"'
        if source not in source_documents:
            source_documents[source] = {}
        source_documents[source][page] = document_string

    return answer, source_documents

def query(prompt: str, chain):
    res = chain({"question" : prompt})
    answer, source_documents = process_response(res=res)
    return answer, source_documents

#### usage

In [None]:
prompt :str = input('query: ')
answer, source_documents = query(prompt=prompt, chain=chain)
print("Response:","\n",answer,"\n\n")