In [None]:
import langchain
from langchain.agents import AgentType, initialize_agent
from langchain import PromptTemplate
from langchain.agents.tools import Tool
from langchain.chat_models import ChatOpenAI
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.document_loaders import UnstructuredURLLoader
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from termcolor import colored
from dotenv import load_dotenv

In [None]:
load_dotenv()

In [None]:
article_link = "https://www.independent.co.uk/news/world/americas/us-politics/ron-desantis-daniel-penny-jordan-neely-b2338438.html" # TODO: make configurable

response_schemas = [
    ResponseSchema(name="1", description="first news article link"),
    ResponseSchema(name="2", description="second news article link"),
    ResponseSchema(name="3", description="third news article link"),
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

template = """
Use the search tool to find three separate news articles are written on the same subject of this news article: {link} 
These articles must be written within the same relative time frame as the original news article.
Respond with the link of each of the three articles.
{format_instructions}
"""
initial_prompt = PromptTemplate(
    input_variables=["link"],
    template=template,
    partial_variables={"format_instructions": format_instructions}
)

In [None]:
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.0)

In [None]:
serp_tool = GoogleSerperAPIWrapper(type="news", tbs="qdr:w")

In [None]:
serp_tool.results("Ron DeSantis Daniel Penny Jordan Neely news articles")

In [None]:
tools = [
    Tool(name="Search Tool",
        description="Useful for searching for news articles",
        func=serp_tool.run)
]

In [None]:
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, max_iterations=5)

In [None]:
agent_results = agent(initial_prompt.format(link=article_link))

In [None]:
related_article_urls = list(output_parser.parse(agent_results["output"]).values())

In [None]:
loader = UnstructuredURLLoader(urls=related_article_urls)

In [None]:
%pip install libmagic 

In [None]:
data = loader.load()

In [None]:
data