In [None]:
import csv
import time
import sys, json
import urllib2

In [None]:
def parse_articles(articles):
    '''
    This function takes in a response to the NYT api and parses
    the articles into a list of dictionaries 
    Inspired by: http://dlab.berkeley.edu/blog/scraping-new-york-times-articles-python-tutorial
    '''
    news = []
    for i in articles['response']['docs']:
        dic = {}
        dic['id'] = i['_id']
        dic['headline'] = i['headline']['main'].encode("utf8")
        dic['date'] = i['pub_date'][0:10] # cutting time of day.
        dic['section'] = i['section_name']
        if i['snippet'] is not None:
            dic['snippet'] = i['snippet'].encode("utf8")
        dic['source'] = i['source']
        dic['type'] = i['type_of_material']
        dic['url'] = i['web_url']
        dic['word_count'] = i['word_count']
        # locations
        locations = []
        for x in range(0,len(i['keywords'])):
            if 'glocations' in i['keywords'][x]['name']:
                locations.append(i['keywords'][x]['value'])
        dic['locations'] = locations

        # subject
        subjects = []
        for x in range(0,len(i['keywords'])):
            if 'subject' in i['keywords'][x]['name']:
                subjects.append(i['keywords'][x]['value'])
        dic['subjects'] = subjects   
        news.append(dic)
    return(news) 

In [None]:
def articles_fetch(beginDate, endDate):
    '''
    Inputs beginDate and endDate (YYYYMMDD), returns all articles with query terms: economy, American, market
    '''

    #Modify each aspect of API Request URL
    apiUrl='https://api.nytimes.com/svc/search/v2/articlesearch.json?'
    key = 'api-key=12e8e79c0c184049bd19328c04daf1fb' #your API key
    query='&q=economy+American+market'  # set the query word here (AND, not OR), terms delimited by +

    apiDate = '&begin_date=' + str(beginDate) + '&end_date=' + str(endDate)
    sort = "&sort=oldest"
    page = '&page=0'

    #combine and create URL
    link = [apiUrl, key, query, apiDate, sort, page]
    reqUrl = ''.join(link)
    
    #print URL to test
    print reqUrl
    
    time.sleep(1)

    #open url and load
    hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}
    
    req = urllib2.Request(reqUrl, headers=hdr)
        
    try:
        page = urllib2.urlopen(req)
    except urllib2.HTTPError, e:
        time.sleep(1)
        page = urllib2.urlopen(req)
  
        
    response = page.read()
    test = json.loads(response)
    
    #number of articles
    num_results = test['response']['meta']['hits']
    
    #number of pages to go through
    num_pages = num_results/10


    #keep everything in all_articles    
    all_articles = []
    test = parse_articles(test)
    all_articles = all_articles + test

    for i in range(1, num_pages + 1):
        #sleep for 1 sec between each request to prevent API Limit
        time.sleep(1)
        page = '&page=' + str(i)
        
        link = [apiUrl, key, query, apiDate, sort, page]
        reqUrl = ''.join(link)

        print i
       
        req = urllib2.Request(reqUrl, headers=hdr)
        
        try:
            page = urllib2.urlopen(req)
        except urllib2.HTTPError, e:
            continue
            print e.fp.read()
        
        response = page.read()

        test = json.loads(response)
        try:
            test = parse_articles(test)
        except KeyError:
            continue

        all_articles = all_articles + test
    return all_articles

In [None]:
def year_fetch(startYear, endYear):
    '''
    Accepts starting and finishing year as inputs, saves CSV of articles 
    '''
    for i in range(startYear, endYear + 1):
        time.sleep(1)
        print i
        fetched = articles_fetch(str(i) +"0101", str(i) + "1231")
        keys = fetched[1].keys()
        
        with open('economy-mentions_' + str(i) + '.csv', 'wb') as output_file:
            dict_writer = csv.DictWriter(output_file, keys)
            dict_writer.writeheader()
            dict_writer.writerows(fetched)
        time.sleep(1)

In [None]:
startYr = input("Starting year? \n")
endYr = input("Ending year? \n")

#insert startyear, secondyear
year_fetch(startYr, endYr)