In [7]:
import getpass
import os

api_key= os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = api_key

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo")

In [8]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("http://environnement.wallonie.be/de/eso/eau_distribution/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer("p")
        )
    
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("quelle est l'origine de l'eau?")



"L'eau provient majoritairement des nappes d'eau souterraine, appelées nappes aquifères, en Wallonie. Les eaux de surface fournissent environ 15% de l'eau de distribution. L'eau du robinet doit répondre à des normes strictes de qualité pour garantir sa sécurité pour la consommation."