# Introduction
In this notebook we explore the news API

In [1]:
%load_ext autoreload
%autoreload 2

In [39]:
from covid.config import Config, Secrets
from covid.ingestion.newsapi import create_newsapi_client, query_articles
from covid.utils.io import write_text_to_s3
from covid.utils.aws import create_s3_resource

# Setup

In [40]:
# setup config object
path_to_config = '../config.json'
config = Config(path_to_config)

# fetch api key from local secrets.json
path_to_secrets = '../secrets.json'
secrets = Secrets(path_to_secrets)

# create newsapi client
api_key = secrets.GOOGLE_NEWS_API_KEY
client = create_newsapi_client(api_key)

# Explore available news sources

In [19]:
sources = [s['id'] for s in newsapi.get_sources()['sources']]
print(sources[:5])

['abc-news', 'abc-news-au', 'aftenposten', 'al-jazeera-english', 'ansa']


# Query articles

In [20]:
keywords = 'corona OR covid OR coronavirus'
start_date = '2020-02-26'
end_date = '2020-03-29'

articles = query_articles(client, keywords, start_date, end_date)

print(f"Retrieved {len(articles)} Articles")

Retrieved 100 Articles


# Write to S3

All data should be in one single text file. Each article's description on a separate line.

In [51]:
s3_resource = create_s3_resource(region_name='us-east-1')
bucket = config.DEFAULT_BUCKET
path = 'raw/articles.txt'

text = ''
for article in articles:
    text += article['description'] + ' \n'
text = text[:-3]   

write_text_to_s3(s3_resource, bucket, text, path)