In [2]:
import langchain
import requests
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.document_loaders import UnstructuredURLLoader
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.output_parsers import RegexParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from termcolor import colored
from bs4 import BeautifulSoup
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
# How many similar articles to use as reference
SIMILAR_COUNT = 3
# Original article url
ARTICLE_URL = "https://www.reuters.com/world/middle-east/turkey-votes-pivotal-elections-that-could-end-erdogans-20-year-rule-2023-05-13/"

In [4]:
response = requests.get(ARTICLE_URL)
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.find('title').get_text().split('|')[0]

In [5]:
serp_tool = GoogleSerperAPIWrapper(tbs="qdr:m")

In [6]:
similar_articles_serp = serp_tool.results(f"{title} news articles")

In [7]:
similar_articles_final = []
for article_meta in similar_articles_serp["organic"]:
    link = article_meta["link"]
    if not link.startswith('https://www.youtube.com') and not link.startswith('https://youtube.com'):
        similar_articles_final.append(link)
        if len(similar_articles_final) == SIMILAR_COUNT:
            break
print(similar_articles_final)

['https://www.cnbc.com/2023/05/14/turkey-election-rivals-both-claim-early-lead-but-runoff-likely.html', 'https://jpost.com/international/article-742977', 'https://www.wsj.com/articles/having-established-turkey-on-the-world-stage-erdogan-faces-risky-vote-at-home-fd0793d9']


In [8]:
total_articles = [ARTICLE_URL]
total_articles.extend(similar_articles_final)

In [9]:
loader = UnstructuredURLLoader(urls=total_articles)
articles_data = loader.load()

In [10]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 250,
    chunk_overlap  = 30,
    length_function = len
)

In [11]:
article_documents = []
for article_content in articles_data[1:]:
    article_documents.extend(text_splitter.create_documents([article_content.page_content]))

In [38]:
primary_texts = text_splitter.split_text([articles_data[0].page_content])

In [13]:
embeddings = OpenAIEmbeddings()
article_db = FAISS.from_documents(article_documents, embeddings)

In [14]:
article_retriever = article_db.as_retriever()

In [15]:
article_retriever.get_relevant_documents("who is the current the current president of Turkey?")

[Document(page_content='TURKISH PRESIDENT Recep Tayyip Erdogan addresses supporters at a rally last week in Istanbul, ahead of today’s presidential and parliamentary elections.  (credit: UMIT BEKTAS/REUTERS)', metadata={}),
 Document(page_content="Turkey headed for a runoff vote after President Tayyip Erdogan led over his opposition rival Kemal Kilicdaroglu in Sunday's election but fell short of an outright majority to extend his 20-year rule of the NATO-member country.", metadata={}),
 Document(page_content="Turkey headed for a runoff vote after President Tayyip Erdogan led over his opposition rival Kemal Kilicdaroglu in Sunday's election but fell short of an outright majority to extend his 20-year rule of the NATO-member country.", metadata={}),
 Document(page_content='The presidential vote will decide not only who leads Turkey but also whether it reverts to a more secular, democratic path, how it will handle its severe cost of living crisis, and manage key relations with Russia, the

In [15]:
# TODO: LLM/Agent reads through article, whenever encountering document with potential misinformation/skewed narrative,
# use get_relevant_documents tool to compare against other articles and determine if a bias is present.

In [43]:
primary_embeddings = OpenAIEmbeddings()
docsearch = FAISS.from_texts(primary_texts, primary_embeddings, metadatas=[{"source": str(i)} for i in range(len(primary_texts))])
query = "who is the current the current president of Turkey?"
docs = docsearch.similarity_search(query)

In [48]:
output_parser = RegexParser(
    regex=r"(.*?)\nScore: (.*)",
    output_keys=["answer", "score"],
)

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:

Question: [question here]
Helpful Answer: [answer here]
Score: [score between 0 and 100]

Begin!

Context:
---------
{context}
---------
Question: {question}
Helpful Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"],
    output_parser=output_parser,
)
chain = load_qa_with_sources_chain(ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.0), chain_type="map_rerank", metadata_keys=['source'], return_intermediate_steps=True, prompt=PROMPT)
result = chain({"input_documents": docs, "question": query}, return_only_outputs=False)

In [16]:
# TODO: Modify map_rerank prompt
# Score: How politically biased section piece of context
# Answer: Why llm thinks context is politically biased

In [50]:
result

{'input_documents': [Document(page_content='The presidential vote will decide not only who leads Turkey but also whether it reverts to a more secular, democratic path, how it will handle its severe cost of living crisis, and manage key relations with Russia, the Middle East and the West.', metadata={'source': '5'}),
  Document(page_content='[1/16]\xa0Turkish President Tayyip Erdogan, accompanied by his wife Ermine Erdogan, greets supporters at the AK Party headquarters in Ankara, Turkey May 15, 2023. REUTERS/Umit Bektas      TPX IMAGES OF THE DAY', metadata={'source': '17'}),
  Document(page_content="KEY PUTIN ALLY\n\nThe choice of Turkey's next president is one of the most consequential political decisions in the country's 100-year history and will reverberate well beyond Turkey's borders.", metadata={'source': '24'}),
  Document(page_content="ISTANBUL, May 14 (Reuters) - Turkey headed for a runoff vote after President Tayyip Erdogan led over his opposition rival Kemal Kilicdaroglu in

In [None]:
# Use zero shot agent in with tool of article_retriever.get_relevant_documents() to determine whether original article contains political bias in comparison to relavent data from other articles