In [43]:

from langchain.embeddings.base import Embeddings
from langchain_openai import OpenAI 
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

from langchain_community.vectorstores import FAISS

from langchain_core.vectorstores import VectorStoreRetriever

from langchain.chains import RetrievalQA
import os

## If you want to use Local embeddings

In [44]:
from sentence_transformers import SentenceTransformer

class LocalEmbeddings(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
    def embed_documents(self, texts):
        return self.model.encode(texts)
    def embed_query(self, text):
        return self.model.encode([text])[0]

In [45]:
loader = TextLoader("computer.txt")

In [46]:
documents = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0,
    length_function=len,
)

In [48]:
docs = text_splitter.split_documents(documents)

## tratando os dados localmente

In [49]:
import numpy as np

#transforma tudo extraido em uma lista com apenas os textos por chunk
texts = [doc.page_content for doc in docs]
metadata = [doc.metadata for doc in docs]

model = LocalEmbeddings()

#transforma os embeddings e textos em um vector database
library = FAISS.from_texts(texts=texts,embedding = model, metadatas=metadata)

In [50]:
Query1 = "whats was the world's first electronic digital programmable computer?"

In [51]:
Query_Answer = library.similarity_search(Query1)
Query_Answer

[Document(metadata={'source': 'computer.txt'}, page_content="In 1941, Zuse followed his earlier machine up with the Z3, the world's first working electromechanical programmable, fully automatic digital computer.[38][39] The Z3 was built with 2000 relays, implementing a 22 bit word length that operated at a clock frequency of about 5–10 Hz.[40] Program code was supplied on punched film while data could be stored in 64 words of memory or supplied from the keyboard. It was quite similar to modern machines in some respects, pioneering numerous advances such"),
 Document(metadata={'source': 'computer.txt'}, page_content='In 1945, Turing joined the National Physical Laboratory and began work on developing an electronic stored-program digital computer. His 1945 report "Proposed Electronic Calculator" was the first specification for such a device. John von Neumann at the University of Pennsylvania also circulated his First Draft of a Report on the EDVAC in 1945.[34]'),
 Document(metadata={'sou

In [52]:
docs_n_scores = library.similarity_search_with_score(Query1)

In [53]:
docs_n_scores

[(Document(metadata={'source': 'computer.txt'}, page_content="In 1941, Zuse followed his earlier machine up with the Z3, the world's first working electromechanical programmable, fully automatic digital computer.[38][39] The Z3 was built with 2000 relays, implementing a 22 bit word length that operated at a clock frequency of about 5–10 Hz.[40] Program code was supplied on punched film while data could be stored in 64 words of memory or supplied from the keyboard. It was quite similar to modern machines in some respects, pioneering numerous advances such"),
  0.7048075),
 (Document(metadata={'source': 'computer.txt'}, page_content='In 1945, Turing joined the National Physical Laboratory and began work on developing an electronic stored-program digital computer. His 1945 report "Proposed Electronic Calculator" was the first specification for such a device. John von Neumann at the University of Pennsylvania also circulated his First Draft of a Report on the EDVAC in 1945.[34]'),
  0.7214

In [54]:
retriever = library.as_retriever()

# Setting it up with Ollama

In [55]:
import ollama
from langchain.chains.question_answering import load_qa_chain
from langchain_ollama.chat_models import ChatOllama
from langchain.embeddings import OllamaEmbeddings


llm = ChatOllama(model='llama3.2:latest')

# Initialize the embeddings model
embeddings = OllamaEmbeddings(model='llama3.2:latest')

# Create a vector store (e.g., FAISS) with your documents
vectorstore = FAISS.from_texts(texts=texts,embedding = embeddings, metadatas=metadata)

combine_documents_chain = load_qa_chain(llm=llm, chain_type="stuff")

# Initialize the RetrievalQA chain
qa = RetrievalQA(combine_documents_chain=combine_documents_chain, retriever=vectorstore.as_retriever())

In [56]:
Query2 = "whats was the world's first electronic digital programmable computer?"

In [57]:
answer = qa.invoke(Query2)

In [58]:
answer

{'query': "whats was the world's first electronic digital programmable computer?",
 'result': "I don't know, but according to the context provided, ENIAC (Electronic Numerical Integrator and Computer) is described as a Turing-complete device that performed ballistics trajectory calculations for the United States Army. However, it seems it may not be the world's first electronic digital programmable computer, as that distinction belongs to another computer, Harwell CADET of 1955."}