In [1]:
import transformers
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
import textwrap
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceBgeEmbeddings


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large", device_map='auto')

pipe = pipeline(
    "text2text-generation",
    model= model,
    tokenizer=tokenizer,
    max_length=837,
    temperature=0,
    # top_p=0.95,
    # repetition_penalty=1.15,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [2]:
# Load file from a folder
dir_loader = DirectoryLoader('doc', glob="**/*.txt", show_progress=True, use_multithreading=True, loader_cls=TextLoader)
docs = dir_loader.load()
print(f"Number of documents: {len(docs)}")

# Split the text
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)
print(f"    Total chunks: {len(texts)}")

100%|██████████| 30/30 [00:00<00:00, 6957.27it/s]

Number of documents: 30
    Total chunks: 89





In [3]:
# embedding
model_name = "BAAI/bge-base-en"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

model_norm = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)


In [4]:
%%time
persist_directory = 'db-bge-py'

## Here is the new embeddings being used
embedding = model_norm

# Make the chroma and persist to disk
# vectordb = Chroma.from_documents(documents=texts,
#                                  embedding=embedding,
#                                  persist_directory=persist_directory)

# Load from disk
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

CPU times: user 521 ms, sys: 86 ms, total: 607 ms
Wall time: 3.28 s


In [5]:
# make a retriver
retriever = vectordb.as_retriever(search_kwargs={"k": 5})

In [12]:
# make a chain
# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [7]:
def wrap_text(text, width=110):
    lines = text.split('\n')
    
    # wrap each line individually
    wrapped_line = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrappedlines back
    wrapped_text = '\n'.join(wrapped_line)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text(llm_response['result']))
    print('\n\nSources')

    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [13]:
query = "answer the following question in a sentence: who is Berns?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

Token indices sequence length is longer than the specified maximum sequence length for this model (1093 > 512). Running this sequence through the model will result in indexing errors




Prof.


Sources
doc/Simulation.txt
doc/humanoid_robots.txt
doc/Simulation.txt
doc/Simulation.txt
doc/CARL.txt
