In [1]:
import json
import requests
from os import makedirs
from os.path import join, exists
from datetime import date, timedelta


In [5]:

def get_news_by_date_range(query_terms, get_field, query_fields, show_fields, start_date, end_date):

    ARTICLES_DIR = join('data', 'articles-n')
    makedirs(ARTICLES_DIR, exist_ok=True)

    MY_API_KEY = open("creds_news_api.txt").read().strip()
    API_ENDPOINT = 'http://newsapi.org/v2/top-headlines'
    my_params = {
        'sources': 'fox-news',
        'from': "",
        'to': "",
        'orderBy': "popularity",
        'show-fields': show_fields,
        'q': query_terms,
        'query-fields': query_fields,
        'category': 'general',
        'pageSize': 100,
        'apiKey': MY_API_KEY
    }

    # day iteration from here:
    # http://stackoverflow.com/questions/7274267/print-all-day-dates-between-two-dates
    
    dayrange = range((end_date - start_date).days + 1)
    
    for daycount in dayrange:
        dt = start_date + timedelta(days=daycount)
        datestr = dt.strftime('%Y-%m-%d')
        fname = join(ARTICLES_DIR, datestr + '.json')
        if not exists(fname):
            # then let's download it
            print("Downloading", datestr)
            all_results = []
            my_params['from'] = datestr
            my_params['to'] = datestr
            current_page = 1
            total_pages = 1
            while current_page <= total_pages:
                print("...page", current_page)
                my_params['page'] = current_page
                resp = requests.get(API_ENDPOINT, my_params)
                data = resp.json()
                
                print(data)
                
                #all_results.extend(data['response']['results'])
                for result in data['response']['results']:

                    if(len(result['fields'][get_field]) > 0):
                        print(result['fields'][get_field])
                        all_results.append(result['fields'][get_field])

                # if there is more than one page
                current_page += 1
                total_pages = data['response']['pages']

            with open(fname, 'w') as f:
                print("Writing to", fname)

                # re-serialize it for pretty indentation
                f.write(json.dumps(all_results, indent=2))

In [6]:
get_news_by_date_range('(President OR Election) AND Trump', 'headline', 'headline', 'headline',
                       date(2020, 6, 2), date(2020, 6, 2))

Downloading 2020-06-02
...page 1
{'status': 'error', 'code': 'parametersIncompatible', 'message': 'You cannot mix the sources parameter with the country or category parameters.'}


KeyError: 'response'

In [2]:
MY_API_KEY = open("creds_news_api.txt").read().strip()

url = ('http://newsapi.org/v2/top-headlines?'
       'country=us&'
       'apiKey=' + MY_API_KEY)
response = requests.get(url)
print(response.json())

{'status': 'ok', 'totalResults': 38, 'articles': [{'source': {'id': 'cnn', 'name': 'CNN'}, 'author': 'Leah Asmelash, CNN', 'title': "Washington's new Black Lives Matter street mural is captured in satellite image - CNN", 'description': "Washington's new BLACK LIVES MATTER mural is so huge, you can see it from space.", 'url': 'https://www.cnn.com/2020/06/06/us/black-lives-matter-dc-street-mural-space-trnd/index.html', 'urlToImage': 'https://cdn.cnn.com/cnnnext/dam/assets/200606144518-black-lives-matter-dc-street-space-trnd-super-tease.jpg', 'publishedAt': '2020-06-06T20:03:00Z', 'content': "(CNN)Washington's new BLACK LIVES MATTER street mural is so huge, you can see it from space.\r\nNew satellite images from Planet Labs clearly show the bright yellow message amid the gray Lego-looking b… [+662 chars]"}, {'source': {'id': None, 'name': 'YouTube'}, 'author': None, 'title': 'Destiny 2 Almighty Explosion Live Event | Kotaku - Kotaku', 'description': "For real check out this huge explosion

In [3]:

url = ('http://newsapi.org/v2/everything?'
       'q=US Presidential Election&'
       'from=2020-05-30&'
       'sortBy=popularity&'
       'apiKey=' + MY_API_KEY)

response = requests.get(url)


In [4]:
print(response.json())

{'status': 'ok', 'totalResults': 1410, 'articles': [{'source': {'id': 'techcrunch', 'name': 'TechCrunch'}, 'author': 'Natasha Lomas', 'title': 'Zoom faces criticism for denying free users e2e encryption', 'description': 'What price privacy? Zoom is facing a fresh security storm after CEO Eric Yuan confirmed that a plan to reboot its battered security cred by (actually) implementing end-to-end encryption does not in fact extend to providing this level of security to non-paying…', 'url': 'http://techcrunch.com/2020/06/03/zooms-privacy-premium/', 'urlToImage': 'https://techcrunch.com/wp-content/uploads/2020/04/GettyImages-1208677869.jpg?w=600', 'publishedAt': '2020-06-03T13:16:53Z', 'content': 'What price privacy? Zoom is facing a fresh security storm after CEO Eric Yuan confirmed that a plan to reboot its battered security cred by (actually) implementing end-to-end encryption does not in f… [+7157 chars]'}, {'source': {'id': None, 'name': 'Gizmodo.com'}, 'author': 'George Dvorsky', 'titl