In [None]:
import torch, transformers
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
from transformers import pipeline

from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader

from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings

In [None]:
loader = DirectoryLoader('documents/', glob='./*.pdf', loader_cls=PyPDFLoader)
documents = loader.load()
len(documents)

In [None]:
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=600,
    chunk_overlap=50
)
texts = text_splitter.split_documents(documents)
len(texts)

In [None]:
texts[3]

In [None]:
embedding_model_name = ""
model_kwargs = {"device" : "cuda"}
encode_kwargs = {"normalize_embeddings" : True}
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl", model_kwargs={"device":"cuda"})
embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-large-en",
    model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)

In [None]:
vectordb = Chroma.from_documents(documents=texts, embedding=embeddings, persist_directory='db')
vectordb.persist()
vectordb = None

In [None]:
vectordb = Chroma(persist_directory='db', embedding_function=embeddings)

In [None]:
retriever = vectordb.as_retriever()

In [None]:
docs = retriever.get_relevant_documents("What is PointLLM?")
len(docs)

In [None]:
docs[0]

In [None]:
len(docs[0].page_content)

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 3})
retriever.search_type

In [None]:
retriever.search_kwargs

In [None]:
# model = "tiiuae/falcon-7b-instruct"
model = "georgesung/llama2_7b_chat_uncensored"
tokenizer = AutoTokenizer.from_pretrained(model)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    # model_kwargs=model_kwargs,
    min_new_tokens=10,
    max_length=2048,
    do_sample=True,
    top_k=5,
    temperature=float(1.2),
    repetition_penalty=float(10.0),
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# hf_llm = "tiiuae/falcon-7b-instruct"
# tokenizer = AutoTokenizer.from_pretrained(hf_llm)

# pipe = pipeline(
#     "text-generation",
#     model=hf_llm,
#     tokenizer=tokenizer,
#     torch_dtype=torch.bfloat16,
#     trust_remote_code=True,
#     device_map='cuda',
#     max_new_tokens=1024,
#     eos_token_id=tokenizer.eos_token_id
# )

# local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# https://python.langchain.com/docs/use_cases/question_answering/how_to/vector_db_qa
chain = RetrievalQA.from_chain_type(llm=local_llm,
                                    chain_type="stuff",
                                    retriever=retriever,
                                    return_source_documents=True)

In [None]:
from pprint import pprint
def formatted_response(llm_response, sources=True):
    print("Resonse:")
    pprint(llm_response['result'])

    if sources:
        source_token_count = 0
        print("\nSources:")
        for document in llm_response['source_documents']:
            source_length = len(document.page_content.split())
            source_token_count += source_length
            print(f"Source: {document.metadata['source']} || Len: {source_length} ||Page {document.metadata['page']}")
        print(f"Total source length: {source_token_count}")

In [None]:
query = "Use proper sentences in your response. "
llm_response = chain(query)
formatted_response(llm_response)

In [None]:
# from langchain.chat_models import ChatOpenAI
# OPENAI_KEY = ''

# openai_llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo', openai_api_key=OPENAI_KEY)

# openai_chain = RetrievalQA.from_chain_type(llm=openai_llm,
#                                     chain_type="stuff",
#                                     retriever=retriever,
#                                     return_source_documents=True)

In [None]:
# query = "Can you elaborate on what the PointLLM paper is talking about?"
# llm_response = openai_chain(query)
# formatted_response(llm_response)

In [None]:
print(chain.combine_documents_chain.llm_chain.prompt.template)

1.
https://www.youtube.com/watch?v=3yPBVii7Ct0&list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ&index=22&ab_channel=SamWitteveen

2.
https://www.youtube.com/watch?v=cFCGUjc33aU&list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ&index=22&ab_channel=SamWitteveen

3.
https://www.youtube.com/watch?v=9ISVjh8mdlA&list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ&index=24&ab_channel=SamWitteveen

4.
https://colab.research.google.com/drive/17eByD88swEphf-1fvNOjf_C79k0h2DgF?usp=sharing#scrollTo=wwyuhrpu5XqM

5.
https://colab.research.google.com/drive/1zG1R08TBikG05ecF8et4vi_1F9xutY-6?usp=sharing#scrollTo=olRm73t3rNt2