In [None]:

from langchain.embeddings.base import Embeddings
from langchain_openai import OpenAI 
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

from langchain_community.vectorstores import FAISS

from langchain_core.vectorstores import VectorStoreRetriever

from langchain.chains import RetrievalQA
import os

## If you want to use Local embeddings

In [60]:
from sentence_transformers import SentenceTransformer

class LocalEmbeddings(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
    def embed_documents(self, texts):
        return self.model.encode(texts)
    def embed_query(self, text):
        return self.model.encode([text])[0]

In [61]:
loader = TextLoader("computer.txt")

In [62]:
documents = loader.load()

In [63]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0,
    length_function=len,
)

In [64]:
docs = text_splitter.split_documents(documents)

## tratando os dados localmente

In [65]:
import numpy as np

#transforma tudo extraido em uma lista com apenas os textos por chunk
texts = [doc.page_content for doc in docs]
metadata = [doc.metadata for doc in docs]

model = LocalEmbeddings()

#transforma os embeddings e textos em um vector database
library = FAISS.from_texts(texts=texts,embedding = model, metadatas=metadata)

In [66]:
Query1 = "whats was the world's first electronic digital programmable computer?"

In [67]:
Query_Answer = library.similarity_search(Query1)
Query_Answer

[Document(metadata={'source': 'computer.txt'}, page_content='processing system, using thousands of vacuum tubes.[34] In the US, John Vincent Atanasoff and Clifford E. Berry of Iowa State University developed and tested the Atanasoff–Berry Computer (ABC) in 1942,[45] the first "automatic electronic digital computer".[46] This design was also all-electronic and used about 300 vacuum tubes, with capacitors fixed in a mechanically rotating drum for memory.[47]'),
 Document(metadata={'source': 'computer.txt'}, page_content='modern electronic computer.[62] As soon as the Baby had demonstrated the feasibility of its design, a project began at the university to develop it into a practically useful computer, the Manchester Mark 1.'),
 Document(metadata={'source': 'computer.txt'}, page_content="In 1941, Zuse followed his earlier machine up with the Z3, the world's first working electromechanical programmable, fully automatic digital computer.[38][39] The Z3 was built with 2000 relays, implementi

In [68]:
docs_n_scores = library.similarity_search_with_score(Query1)

In [69]:
docs_n_scores

[(Document(metadata={'source': 'computer.txt'}, page_content='processing system, using thousands of vacuum tubes.[34] In the US, John Vincent Atanasoff and Clifford E. Berry of Iowa State University developed and tested the Atanasoff–Berry Computer (ABC) in 1942,[45] the first "automatic electronic digital computer".[46] This design was also all-electronic and used about 300 vacuum tubes, with capacitors fixed in a mechanically rotating drum for memory.[47]'),
  0.61609745),
 (Document(metadata={'source': 'computer.txt'}, page_content='modern electronic computer.[62] As soon as the Baby had demonstrated the feasibility of its design, a project began at the university to develop it into a practically useful computer, the Manchester Mark 1.'),
  0.65563977),
 (Document(metadata={'source': 'computer.txt'}, page_content="In 1941, Zuse followed his earlier machine up with the Z3, the world's first working electromechanical programmable, fully automatic digital computer.[38][39] The Z3 was b

In [70]:
retriever = library.as_retriever()

# Setting it up with Ollama

In [71]:
import ollama
from langchain.chains.question_answering import load_qa_chain
from langchain_ollama.chat_models import ChatOllama
from langchain.embeddings import OllamaEmbeddings


llm = ChatOllama(model='llama3.2:latest')

# Initialize the embeddings model
embeddings = OllamaEmbeddings(model='llama3.2:latest')

# Create a vector store (e.g., FAISS) with your documents
vectorstore = FAISS.from_texts(texts=texts,embedding = embeddings, metadatas=metadata)

combine_documents_chain = load_qa_chain(llm=llm, chain_type="stuff")

# Initialize the RetrievalQA chain
qa = RetrievalQA(combine_documents_chain=combine_documents_chain, retriever=vectorstore.as_retriever())

In [72]:
Query2 = "whats was the world's first electronic digital programmable computer?"

In [73]:
answer = qa.invoke(Query2)

In [74]:
answer

{'query': "whats was the world's first electronic digital programmable computer?",
 'result': "The EDVAC (Electronic Discrete Variable Automatic Computer), designed by John Mauchly and J. Presper Eckert at the University of Pennsylvania, is often considered to be the world's first electronic digital programmable computer. It was completed in 1945 and built upon earlier work on the development of an electronic stored-program digital computer."}