In [1]:
import pandas as pd
import requests
import json
from nytimesarticle import articleAPI

In [2]:
api = articleAPI('e1bae24c8fab4067915a4c38247f4935')

In [37]:
def parse_articles(articles):
    '''
    This function takes in a response to the NYT api and parses
    the articles into a list of dictionaries
    '''
    news = []
    try:
        for i in articles['response']['docs']:
            dic = {}
            dic['id'] = i['_id']
            if i['abstract'] is not None:
                dic['abstract'] = i['abstract'].encode("utf8")
            dic['headline'] = i['headline']['main'].encode("utf8")
            dic['desk'] = i['news_desk']
            dic['date'] = i['pub_date'][0:10] # cutting time of day.
            dic['section'] = i['section_name']
            if i['snippet'] is not None:
                dic['snippet'] = i['snippet'].encode("utf8")
            dic['source'] = i['source']
            dic['type'] = i['type_of_material']
            dic['url'] = i['web_url']
            dic['word_count'] = i['word_count']
            # locations
            locations = []
            for x in range(0,len(i['keywords'])):
                if 'glocations' in i['keywords'][x]['name']:
                    locations.append(i['keywords'][x]['value'])
            dic['locations'] = locations
            # subject
            subjects = []
            for x in range(0,len(i['keywords'])):
                if 'subject' in i['keywords'][x]['name']:
                    subjects.append(i['keywords'][x]['value'])
            dic['subjects'] = subjects   
            news.append(dic)
        return(news) 
    except KeyError:
        pass

In [73]:
def get_articles(date):
#def get_articles(date, query):
    '''
    This function accepts a year in string format (e.g.'1980')
    and a query (e.g.'Amnesty International') and it will 
    return a list of parsed articles (in dictionaries)
    for that year.
    '''
    all_articles = []
    for i in range(0,100): #NYT limits pager to first 100 pages. But rarely will you find over 100 pages of results anyway.
        articles = api.search(#q = query,
               fq = {'source':['Reuters','AP', 'The New York Times'], 'news_desk': ['Politics']},
               begin_date = date + '0101',
               end_date = date + '1231',
               sort='oldest',
               page = str(i))
        articles = parse_articles(articles)
        try:
            all_articles = all_articles + articles
        except TypeError:
            pass
    return(all_articles)

In [74]:
nyt_pol = get_articles('2016')

In [75]:
nyt_pol_df = pd.DataFrame(nyt_pol)

In [76]:
nyt_pol_df.head()

Unnamed: 0,abstract,date,desk,headline,id,locations,section,snippet,source,subjects,type,url,word_count
0,,2016-01-12,U.S. / Politics,The State(s) of the Union,54be6f5038f0d807ab72dfab,[],U.S.,Beginning with his 2009 address to a joint ses...,The New York Times,[State of the Union Message (US)],Video,http://www.nytimes.com/video/us/politics/10000...,28.0
1,,2016-04-18,U.S. / Politics,Courting Favor,5505e2b838f0d85e90e0002e,[],U.S.,"In a series of articles, Eric Lipton of The Ne...",The New York Times,"[Attorneys General, Campaign Finance, Lobbying...",Interactive Feature,http://www.nytimes.com/interactive/2015/us/pol...,
2,,2016-01-01,U.S. / Politics,Donald Trump’s Taxes: What We Know and Don’t Know,57f05f8a95d0e021d798993d,[],U.S.,In the absence of any disclosures from Mr. Tru...,The New York Times,"[Presidential Election of 2016, Federal Taxes ...",Interactive Feature,http://www.nytimes.com/interactive/2016/us/pol...,
3,Hillary Clinton’s presidential campaign raised...,2016-01-01,Politics,Hillary Clinton Raised $37 Million in Last 3 M...,5686ced038f0d82225326f1e,[],U.S.,Hillary Clinton’s presidential campaign raised...,The New York Times,"[Campaign Finance, Presidential Election of 2016]",Blog,http://www.nytimes.com/politics/first-draft/20...,374.0
4,Senator Bernie Sanders’s presidential campaign...,2016-01-02,Politics,Bernie Sanders Took In $33 Million in Last Qua...,5688079638f0d8190470389d,[],U.S.,Senator Bernie Sanders’s presidential campaign...,The New York Times,"[Campaign Finance, Presidential Election of 2016]",Blog,http://www.nytimes.com/politics/first-draft/20...,408.0


In [92]:
for i in range(20):
    text = nyt_pol_df['snippet'].iloc[i]
    print text + str(len(text)) + '\n'

 Beginning with his 2009 address to a joint session of Congress, President Obama has focused on several themes that remain hallmarks of his State of the Union speeches....170

In a series of articles, Eric Lipton of The New York Times examines the explosion in lobbying of state attorneys general by corporate interests and the millions in campaign donations they now provide....203

In the absence of any disclosures from Mr. Trump, The New York Times and other news outlets have attempted to fill in the gaps....130

Hillary Clinton’s presidential campaign raised $37 million in the three-month period that ended Dec. 31, bringing her total war chest for use in the Democratic presidential contest to more than $112 million....212

Senator Bernie Sanders’s presidential campaign took in more than $33 million in the final three months of the 2015, as his wide base of small donors enabled him to raise just $4 million less than his rival Hillary Clinton in their fight for the ...250

Donald J. Tru

In [85]:
nyt_pol_df['snippet'].count()

239