In [1]:
import os

from lib.search import do_search

search_results = do_search("who win the last elections in the Netherlands")

In [2]:
print(search_results["organic"])

[{'title': '2021 Dutch general election - Wikipedia', 'link': 'https://en.wikipedia.org/wiki/2021_Dutch_general_election', 'snippet': 'General elections were held in the Netherlands from 15 to 17 March 2021 to elect all 150 members of the House of Representatives. Following the elections ...', 'position': 1}, {'title': '2023 Dutch general election - Wikipedia', 'link': 'https://en.wikipedia.org/wiki/2023_Dutch_general_election', 'snippet': 'Early general elections were held in the Netherlands on 22 November 2023 to elect the members of the House of Representatives. The elections had been ...', 'position': 2}, {'title': 'Dutch election: Geert Wilders records massive shock win in Netherlands - AP News', 'link': 'https://apnews.com/article/netherlands-election-candidates-prime-minister-f31f57a856f006ff0f2fc4984acaca6b', 'snippet': "The far-right, anti-Islam candidate's win is one of the biggest political upsets in Dutch politics since World War II.", 'date': 'Nov 23, 2023', 'position': 3}

In [3]:
from langchain_core.documents import Document
from lib.scrapping import get_html, get_text

headers = {
    'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    'Accept-Encoding': "gzip, deflate",
    'Accept-Language': "en-US,en;q=0.5",
    # 'Alt-Used' and 'Host' will be set dynamically
    'Connection': "keep-alive",
    'Referer': "https://www.google.com/",
    'Sec-Fetch-Dest': "document",
    'Sec-Fetch-Mode': "navigate",
    'Sec-Fetch-Site': "cross-site",
    'Upgrade-Insecure-Requests': "1",
    'User-Agent': "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/111.0",
}

pages: list[Document] = []

for result in search_results["organic"]:
    link = result["link"]
    
    html = get_html(link)
    text = get_text(html, link, False)
    
    pages.append(Document(page_content = text, metadata = result))

print(pages)


Query: https://en.wikipedia.org/wiki/2021_Dutch_general_election
Query: https://en.wikipedia.org/wiki/2023_Dutch_general_election
Query: https://apnews.com/article/netherlands-election-candidates-prime-minister-f31f57a856f006ff0f2fc4984acaca6b
Query: https://www.aa.com.tr/en/europe/final-official-results-of-dutch-elections-confirm-wilders-win/3070502
Query: https://www.bbc.com/news/world-europe-67504272
Query: https://www.politico.eu/europe-poll-of-polls/netherlands/
Query: https://www.cnbc.com/2023/11/23/dutch-election-what-comes-next-after-shock-far-right-victory.html
Query: https://www.haaretz.com/world-news/europe/2023-11-30/ty-article-magazine/.premium/the-elections-that-shook-the-world-how-the-anti-muslim-far-right-won-the-netherlands/0000018c-2019-d9ef-abcd-3e3923060000
Query: https://www.atlanticcouncil.org/blogs/new-atlanticist/the-shocking-dutch-election-is-done-the-political-maneuvering-is-just-beginning/
Query: https://time.com/6339173/dutch-election-results-geert-wilders-f

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)

vectorstore = FAISS.from_documents(
    splits, embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

In [5]:
from langchain_community.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI(api_key=os.getenv('OPENAI_API_KEY'), model_name="gpt-4-1106-preview")

chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | model
        | StrOutputParser()
)

In [6]:
chain.invoke("who won the last elections in the Netherlands")

"Geert Wilders' far-right Party for Freedom (PVV) won the last elections in the Netherlands."