In [1]:
from langchain.embeddings.base import Embeddings
from langchain_openai import OpenAI 
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

from langchain_community.vectorstores import FAISS

from langchain_core.vectorstores import VectorStoreRetriever

from langchain.chains import RetrievalQA
import os

## If you want to use Local embeddings

In [2]:
from sentence_transformers import SentenceTransformer
import numpy  as np
class LocalEmbeddings(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
        
    def embed_documents(self, texts):
        return np.array(self.model.encode(texts))
    
    def embed_query(self, text):
        return np.array(self.model.encode([text])[0])

  from tqdm.autonotebook import tqdm, trange


In [3]:
loader = TextLoader("Honda_2020.md")

In [4]:
documents = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=0,
    length_function=len,
)

In [6]:
docs = text_splitter.split_documents(documents)

## tratando os dados localmente

In [7]:
import numpy as np

#transforma tudo extraido em uma lista com apenas os textos por chunk
texts = [doc.page_content for doc in docs]
metadata = [doc.metadata for doc in docs]

model = LocalEmbeddings()

#transforma os embeddings e textos em um vector database
library = FAISS.from_texts(texts=texts,embedding = model, metadatas=metadata)



In [None]:
Query1 = "Me explique sobre a linha de Bordo"

In [9]:
Query_Answer = library.similarity_search(Query1)
Query_Answer

[Document(metadata={'source': 'Honda_2020.md'}, page_content='**B**'),
 Document(metadata={'source': 'Honda_2020.md'}, page_content='com Bluetooth[®].'),
 Document(metadata={'source': 'Honda_2020.md'}, page_content='Bluetooth[®]........................................ 9-42\nInformações Legais do Apple'),
 Document(metadata={'source': 'Honda_2020.md'}, page_content='iPod[®].\n\nnovamente)')]

In [10]:
docs_n_scores = library.similarity_search_with_score(Query1)

In [11]:
docs_n_scores

[(Document(metadata={'source': 'Honda_2020.md'}, page_content='**B**'),
  1.305159),
 (Document(metadata={'source': 'Honda_2020.md'}, page_content='com Bluetooth[®].'),
  1.3274362),
 (Document(metadata={'source': 'Honda_2020.md'}, page_content='Bluetooth[®]........................................ 9-42\nInformações Legais do Apple'),
  1.3391099),
 (Document(metadata={'source': 'Honda_2020.md'}, page_content='iPod[®].\n\nnovamente)'),
  1.4186926)]

In [12]:
retriever = library.as_retriever()

# Setting it up with Ollama

In [13]:
import ollama
from langchain.chains.question_answering import load_qa_chain
from langchain_ollama.chat_models import ChatOllama
from langchain.embeddings import OllamaEmbeddings

llm = ChatOllama(model='llama3.2:latest')

# Initialize the embeddings model
embeddings = OllamaEmbeddings(model='llama3.2:latest')

combine_documents_chain = load_qa_chain(llm=llm, chain_type="stuff")

# Initialize the RetrievalQA chain
qa = RetrievalQA(combine_documents_chain=combine_documents_chain, retriever=library.as_retriever())

  embeddings = OllamaEmbeddings(model='llama3.2:latest')
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  combine_documents_chain = load_qa_chain(llm=llm, chain_type="stuff")
  qa = RetrievalQA(combine_documents_chain=combine_documents_chain, retriever=library.as_retriever())


In [14]:
Query2 = "whats was the world's first electronic digital programmable computer?"

In [None]:
answer = qa.invoke(Query1)

In [16]:
answer

{'query': "whats was the world's first electronic digital programmable computer?",
 'result': "I don't know, but I can tell you that Charles Babbage is often credited with designing the first mechanical computer, the Difference Engine and Analytical Engine, in the early 19th century. \n\nHowever, some historians argue that the Colossus machine, built in 1943 during World War II at Bletchley Park, was a precursor to modern electronic computers, as it was used for code-breaking purposes.\n\nThe first electronic digital programmable computer is generally considered to be ENIAC (Electronic Numerical Integrator and Computer), which was completed in 1946 by John Mauchly and J. Presper Eckert at the University of Pennsylvania."}