In [10]:
import langchain
from langchain.agents import AgentType, initialize_agent
from langchain import PromptTemplate
from langchain.agents.tools import Tool
from langchain.chat_models import ChatOpenAI
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.document_loaders import UnstructuredURLLoader
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from termcolor import colored
from dotenv import load_dotenv

In [5]:
load_dotenv()

True

In [6]:
article_link = "https://www.independent.co.uk/news/world/americas/us-politics/ron-desantis-daniel-penny-jordan-neely-b2338438.html" # TODO: make configurable

response_schemas = [
    ResponseSchema(name="1", description="first news article link"),
    ResponseSchema(name="2", description="second news article link"),
    ResponseSchema(name="3", description="third news article link"),
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

template = """
Use the search tool to find three separate news articles are written on the same subject of this news article: {link} 
These articles must be written within the same relative time frame as the original news article.
Respond with the link of each of the three articles.
{format_instructions}
"""
initial_prompt = PromptTemplate(
    input_variables=["link"],
    template=template,
    partial_variables={"format_instructions": format_instructions}
)

In [9]:
llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.0)

In [11]:
serp_tool = GoogleSerperAPIWrapper(type="news")

In [12]:
tools = [
    Tool(name="Search Tool",
        description="Useful for searching for news articles",
        func=serp_tool.run)
]

In [13]:
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, max_iterations=5)

In [14]:
agent_results = agent(initial_prompt.format(link=article_link))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the search tool to find news articles on the same subject as the original article.
Action: Search Tool
Action Input: "Ron DeSantis Daniel Penny Jordan Neely news articles"[0m
Observation: [36;1m[1;3mThe man who put Neely in a fatal chokehold on a New York City subway car is facing manslaughter charges. 22h ago. Fox News. DeSantis fundraises ... Ron DeSantis (R-Fla.) and Rep. Matt Gaetz (R-Fla.) offered their support for Daniel Penny, after Penny was charged Friday with second-degree ... A fundraising campaign for former Marine Daniel Penny, who was charged with manslaughter after holding Jordan Neely in a fatal chokehold on ... Missing: articles | Must include:articles. Supporters are raising defense funds for Daniel Penny, the man charged for killing Jordan Neely on the NYC subway. Daniel Penny Jordan Neely Ex-Marine Daniel Penny is charged with manslaughter in the Jordan Neely chokehold death. Juan Vazquez D

In [17]:
related_article_urls = list(output_parser.parse(agent_results["output"]).values())

In [18]:
loader = UnstructuredURLLoader(urls=related_article_urls)

In [26]:
%pip install libmagic 

Collecting libmagic
  Downloading libmagic-1.0.tar.gz (3.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Installing collected packages: libmagic
  Running setup.py install for libmagic: started
  Running setup.py install for libmagic: finished with status 'done'
Successfully installed libmagic-1.0
Note: you may need to restart the kernel to use updated packages.


  DEPRECATION: libmagic is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559

[notice] A new release of pip is available: 23.0.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [27]:
data = loader.load()

Error fetching or processing https://www.independent.co.uk/news/world/americas/us-politics/ron-desantis-daniel-penny-jordan-neely-b2338438.html, exeption: libmagic is unavailable. Filetype detection on file-like objects requires libmagic. Please install libmagic and try again.
Error fetching or processing https://www.newsbreak.com/news/2239472382679/daniel-penny-surrenders-in-jordan-neely-chokehold-death, exeption: libmagic is unavailable. Filetype detection on file-like objects requires libmagic. Please install libmagic and try again.


In [28]:
data

