# Fazendo uma pergunta usando a biblioteca ollama

In [None]:
import ollama
formatted_prompt = input('Faça uma pergunta: ')

result = ollama.chat(model='llama3', stream=True, messages=[{'role': 'user', 'content': formatted_prompt}])
#print(result['message']['content'])

for chunk in result:
  print(chunk['message']['content'], end='', flush=True)

# Fazendo uma pergunta usando a biblioteca langchain_community

In [None]:
from langchain_community.llms import Ollama
# a linha abaixo mostra como usar o ollama em um outro computador
# llm = Ollama(model="llama3",base_url='http://192.168.20.50:11434', temperature=0.1)
llm = Ollama(model="llama2", temperature=0.1)
formatted_prompt = input('Faça uma pergunta: ')
print(llm.invoke(formatted_prompt))

# Fazendo uma pergunta usando a biblioteca langchain_community em stream

In [None]:
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

llm = Ollama(
    model="llama2", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)
formatted_prompt = input('Faça uma pergunta: ')
print(llm.invoke(formatted_prompt))

# Pesquisa na internet e retorna uma informação

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
# Pode ter que resolver problemas com chromadb e sqlite3

import ollama

def load_and_retrieve_docs(url):
    loader = WebBaseLoader(
        web_paths=(url,),
        bs_kwargs=dict() 
    )
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=200,
    	chunk_overlap=50)
    splits = text_splitter.split_documents(docs)
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
    return vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":2})

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def rag_chain(url, question):
    retriever = load_and_retrieve_docs(url)
    retrieved_docs = retriever.invoke(question)
    formatted_context = format_docs(retrieved_docs)
    formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
    response = ollama.chat(model='llama2', messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']

url = "https://github.com/mvdiogo/CrewAI"
question = "Como instalar o crewai"
result = rag_chain(url, question)

print(result)

# Melhora a qualidade do retriever usdando BM25

In [None]:
from langchain_community.retrievers import BM25Retriever

nome_arquivo = "quemmatouodete.txt"
with open(nome_arquivo, 'r') as arquivo:
    linhas = [linha.strip() for linha in arquivo.readlines()]

retriever = BM25Retriever.from_texts(linhas, similarity_top_k=5)

result = retriever.invoke("quem matou odete roitman")

print(result)

# Melhora a qualidade do retriever pesquisando na wikipedia pela biblioteca da langchain

In [None]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

params = {"lang":"pt","top_k_results":5}
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(params = params))

result = wikipedia.run("coronel silvino")
print(result)

# Melhora a qualidade do retriever pesquisando na wikipedia pela biblioteca nativa

In [None]:
import wikipedia
wikipedia.set_lang('pt')
result = wikipedia.summary('Coronel Silvino', sentences = 10, auto_suggest = True)
#result = wikipedia.search('Coronel silvino')
print(result)

# Melhora a qualidade do retriever pesquisando no DuckDuckGoSearchResults

In [None]:
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

search = DuckDuckGoSearchResults()
wrapper = DuckDuckGoSearchAPIWrapper(region="pt-br", time="d", max_results=10)
search = DuckDuckGoSearchResults(api_wrapper=wrapper)

print(search.run("flisol"))

# Buscando informações de arquivo não estruturado (imagens)

In [None]:
from langchain_community.document_loaders.image import UnstructuredImageLoader
from langchain_community.document_loaders import ImageCaptionLoader #apt install tesseract-ocr
loader = UnstructuredImageLoader("1.png")
data = loader.load()
print(data)

loader = ImageCaptionLoader('ogaroto.jpg')
doc = loader.load()
print(doc)