In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline, AutoModelForCausalLM, AutoModelForQuestionAnswering, AutoModel
from langchain import HuggingFacePipeline, HuggingFaceHub
from langchain.document_loaders import UnstructuredURLLoader, UnstructuredPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import pickle

from glob import glob

from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
### RetrievalQA With Local HuggingFace Model

In [4]:
model = AutoModelForCausalLM.from_pretrained("PY007/TinyLlama-1.1B-Chat-v0.2", 
                                            #   max_length=200
                                              )
tokenizer = AutoTokenizer.from_pretrained("PY007/TinyLlama-1.1B-Chat-v0.2")
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(pipeline=pipe)

# llm = HuggingFaceHub(repo_id="google/flan-t5-xxl")

# llm = HuggingFacePipeline.from_model_id("google/flan-t5-xl", task="text2text-generation")

In [5]:
urls = ["https://python.langchain.com/docs/get_started/introduction"]
loader = UnstructuredURLLoader(urls = urls)
documents = loader.load()

# pdf_paths = glob("path/to/pdfs/*")
# documents = []
# for path in pdf_paths:
#     loader = UnstructuredPDFLoader(file_path=path)
#     documents.extend(loader.load())



In [6]:
len(documents)

1

In [7]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=500
)
texts = text_splitter.split_documents(documents)

In [8]:
len(texts)

2

In [9]:
texts[0]

Document(page_content='Get started\n\nIntroduction\n\nIntroduction\n\nLangChain is a framework for developing applications powered by language models. It enables applications that:\n\nAre context-aware: connect a language model to sources of context (prompt instructions, few shot examples, content to ground its response in, etc.)\n\nReason: rely on a language model to reason (about how to answer based on provided context, what actions to take, etc.)\n\nThe main value props of LangChain are:\n\nComponents: abstractions for working with language models, along with a collection of implementations for each abstraction. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not\n\nOff-the-shelf chains: a structured assembly of components for accomplishing specific higher-level tasks\n\nOff-the-shelf chains make it easy to get started. For complex applications, components make it easy to customize existing chains and build new ones.\n\nGet starte

In [10]:
##Load Embedding Model to Create Vectors from Documents
embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large",
    model_kwargs={"device":"cuda"}
)

load INSTRUCTOR_Transformer
max_seq_length  512


In [11]:
#This line can take a while to run. Will save vector store in next cell so it can be loaded in from disk in subsequent runs
vector_store = FAISS.from_documents(texts,embeddings)

In [12]:
with open("vector_store.db", "wb") as f:
    pickle.dump(vector_store, f)

In [13]:
# with open("vector_store.db", "rb") as f:
#     vector_store = pickle.load(f)

In [14]:
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())

In [15]:
## Modify this query to be relevant to the documents stored in the vector store
query = "What is Langchain?"

In [16]:
vector_store.search(query, "similarity")

[Document(page_content='Get started\n\nIntroduction\n\nIntroduction\n\nLangChain is a framework for developing applications powered by language models. It enables applications that:\n\nAre context-aware: connect a language model to sources of context (prompt instructions, few shot examples, content to ground its response in, etc.)\n\nReason: rely on a language model to reason (about how to answer based on provided context, what actions to take, etc.)\n\nThe main value props of LangChain are:\n\nComponents: abstractions for working with language models, along with a collection of implementations for each abstraction. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not\n\nOff-the-shelf chains: a structured assembly of components for accomplishing specific higher-level tasks\n\nOff-the-shelf chains make it easy to get started. For complex applications, components make it easy to customize existing chains and build new ones.\n\nGet start

In [17]:
## These results include the knowledge base
qa.run(query)

' Langchain is a framework for developing applications powered by language models. It enables applications to:\n\nGet started\n\nIntroduction\n\nGet started\n\n'

In [18]:
## These results do not include the knowledge base (just raw input/output with model)
llm.predict(query)

'\nLangchain is a blockchain platform that allows users to create, buy, and sell digital art. It is designed to be a decentralized platform'

In [19]:
from transformers import RagTokenizer, RagRetriever, RagTokenForGeneration
from datasets import load_dataset

tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
model = RagTokenForGeneration.from_pretrained("facebook/rag-token-nq", retriever=retriever)

Downloading (…)lve/main/config.json:   0%|          | 0.00/4.60k [00:00<?, ?B/s]



Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading (…)_tokenizer/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'DPRQuestionEncoderTokenizer'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'DPRQuestionEncoderTokenizerFast'.


Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)tokenizer/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)tokenizer/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'BartTokenizer'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'BartTokenizerFast'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'DPRQuestionEncoderTokenizer'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may res

Downloading builder script:   0%|          | 0.00/9.62k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/67.5k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/14.6k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/4.69G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
input_dict = tokenizer.prepare_seq2seq_batch("who holds the record in 100m freestyle", return_tensors="pt") 

generated = model.generate(input_ids=input_dict["input_ids"]) 
print(tokenizer.batch_decode(generated, skip_special_tokens=True)[0]) 