In [2]:
import os
import sys
from dotenv import load_dotenv

load_dotenv()

env_variables_names = ["NEWS_API_KEY"]
for env_variable in env_variables_names:
    if env_variable not in os.environ:
        raise Exception(f"{env_variable} env variable should be defined")

module_path = os.path.abspath(os.path.join('..', '..', 'news-bias-agent'))
if module_path not in sys.path:
    sys.path.append(module_path)

%load_ext autoreload
%autoreload 2

In [3]:
from newsapi import NewsApiClient
import pandas as pd

newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

sources = newsapi.get_sources(language='en', country='us')
sources_df = pd.DataFrame(sources["sources"])
print(f"Available sources: {len(sources['sources'])}")
sources_df.info()
sources_df.head()


Available sources: 55
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           55 non-null     object
 1   name         55 non-null     object
 2   description  55 non-null     object
 3   url          55 non-null     object
 4   category     55 non-null     object
 5   language     55 non-null     object
 6   country      55 non-null     object
dtypes: object(7)
memory usage: 3.1+ KB


Unnamed: 0,id,name,description,url,category,language,country
0,abc-news,ABC News,"Your trusted source for breaking news, analysi...",https://abcnews.go.com,general,en,us
1,al-jazeera-english,Al Jazeera English,"News, analysis from the Middle East and worldw...",https://www.aljazeera.com,general,en,us
2,ars-technica,Ars Technica,The PC enthusiast's resource. Power users and ...,https://arstechnica.com,technology,en,us
3,associated-press,Associated Press,The AP delivers in-depth coverage on the inter...,https://apnews.com/,general,en,us
4,axios,Axios,Axios are a new media company delivering vital...,https://www.axios.com,general,en,us


In [None]:
articles = newsapi.get_everything(
    q='climate change',
    sources='cnn,fox-news,abc-news',
    page_size=20,
    sort_by='relevancy' # Valid values: {"relevancy", "popularity", "publishedAt"}
)

for article in articles['articles']:
    print(f"{article['source']['name']}: {article['title']}")

CNN: Welcome to the planet’s newest oil frontier
ABC News: Drought linked to increased conflict between humans and wildlife in California
CNN: Taps may run dry in this country, where the water crisis is so severe it can be seen from space
ABC News: WATCH: Former ABC News senior correspondent Jim Avila dies at 69
ABC News: WATCH: 'Mission possible' for sweets-loving twins
ABC News: WATCH: Jury awards woman $1.75M over husband's alleged affair with TikTok influencer
ABC News: WATCH: Leonardo DiCaprio eulogizes primatologist Jane Goodall
ABC News: WATCH: First lady Melania Trump speaks at signing of 'Fostering the Future' executive order
ABC News: WATCH: White House dealing with fallout from newly released Epstein emails
ABC News: WATCH: Train station in Ukraine pummeled by overnight Russian strikes
ABC News: WATCH: James Cameron talks 'Avatar: Fire and Ash'
ABC News: WATCH: Big cats rescued around the world find home in England
ABC News: WATCH: New Arizona rep. says she'll sign Epstein f

In [9]:
articles_df = pd.json_normalize(articles["articles"]) # this instead of pd.DataFrame(...) to handle nested dicts