In [8]:
# Hugging Face token
import getpass
import os
if "HUGGINGFACEHUB_API_TOKEN" not in os.environ:
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = getpass.getpass()

In [9]:
# fetch the txt sample from GitHub with an HTTP GET request
import requests
URL = "https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt"
res = requests.get(URL)
with open("state_of_the_union.txt", "w") as f:
    f.write(res.text)

In [10]:
# with langchain textloader load the .txt file on documents variable
from langchain.document_loaders import TextLoader
loader = TextLoader('./state_of_the_union.txt')
documents = loader.load()

In [11]:
# this function wraps the document on phrases preserving the new line (\n) operator
import textwrap

def wrap_text_preserving_new_lines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')
    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text


In [12]:
# Text Splitting
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [13]:
len(docs)

42

In [14]:
# Embeddings
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()

In [15]:
from langchain.vectorstores import FAISS

db = FAISS.from_documents(docs, embeddings)

In [16]:
query = "What did the president say about the Supreme Court"
docs = db.similarity_search(query)

In [17]:
print(wrap_text_preserving_new_lines(str(docs[0].page_content)))

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And
while you’re at it, pass the Disclose Act so Americans can know who is funding our elections.

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an
Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer,
thank you for your service.

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the
United States Supreme Court.

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our
nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.


In [1]:
# QA Search
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint

In [4]:
llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", temperature=0.1, max_length=512)

                    max_length was transferred to model_kwargs.
                    Please make sure that max_length is what you intended.


Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to C:\Users\usuario\.cache\huggingface\token
Login successful


In [18]:
chain = load_qa_chain(llm, chain_type="stuff")

In [20]:
query = "What did the president say about the Supreme Court"
docs = db.similarity_search(query)
chain.run(input_documents=docs, question=query)

" The president called on the Senate to pass three pieces of legislation related to voting rights and campaign finance disclosure. He also announced his nomination of Judge Ketanji Brown Jackson to the Supreme Court. He expressed optimism about America's ability to overcome current challenges and make progress."