### Load libraries

In [1]:
import yaml
import json
import requests
from datetime import datetime,timedelta
import time
import math
from google.cloud import storage

### Configuration from a YAML file

In [2]:
config_file = open('news_parameter_request.yaml', 'r')
config = yaml.safe_load(config_file)

### Enviroment variables

API requests are limited to 100 per day, so we limit the topics to max_days * max_topics requests per code run.

In [3]:
bucket = config['bucket']
api_key = config['api_key']
language = config['language']
topics = config["topics"]
date_start = config["period"][0]
date_end = config["period"][1]
base_url = f'https://newsapi.org/v2/everything?apiKey={api_key}&language={language}&'
max_days = 1
max_topics = 1

days = (datetime.strptime(date_end, '%d-%m-%Y').date() - datetime.strptime(date_start, '%d-%m-%Y').date()).days
dates = [(datetime.strptime(date_start, '%d-%m-%Y').date() + timedelta(days = x)).strftime("%Y-%m-%d") for x in range(days + 1)]
dates = dates[:max_days]
topics = topics[:max_topics]

print(dates)
print(topics)

['2022-03-05']
['cryptocurrencies']


### API GET Request

In [4]:
data = []

for topic in topics:
    for date in dates:
        request_body = base_url + f'q={topic}&from={date}&sortBy=publishedAt'
        print(request_body)
        payload = requests.get(request_body).json()
        payload['topic'] = topic
        payload['date'] = date
        data.append(payload)

https://newsapi.org/v2/everything?apiKey=API_KEY&language=en&q=cryptocurrencies&from=2022-03-05&sortBy=publishedAt


In [5]:
print(data)

[{'status': 'error', 'code': 'apiKeyInvalid', 'message': 'Your API key is invalid or incorrect. Check your key, or go to https://newsapi.org to create a free API key.', 'topic': 'cryptocurrencies', 'date': '2022-03-05'}]


### Write into Google Cloud Storage

In [None]:
client = storage.Client()
gcs_bucket = client.get_bucket(bucket)

In [None]:
for row in data:
    path = f"news/{row['url']}/data_{row['title']}.json"
    blob = gcs_bucket.blob(path)
    with blob.open(mode = 'w') as file:
        json.dump(row, file)