In [1]:
import requests

API_ROOT = 'http://api.nytimes.com/svc/search/v2/articlesearch.'

API_SIGNUP_PAGE = 'http://developer.nytimes.com/docs/reference/keys'


class NoAPIKeyException(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


class articleAPI(object):
    def __init__(self, key=None):
        """
        Initializes the articleAPI class with a developer key. Raises an exception if a key is not given.
        Request a key at http://developer.nytimes.com/docs/reference/keys
        :param key: New York Times Developer Key
        """
        self.key = key
        self.response_format = 'json'

        if self.key is None:
            raise NoAPIKeyException('Warning: Missing API Key. Please visit ' + API_SIGNUP_PAGE + ' to register for a key.')

    def _bool_encode(self, d):
        """
        Converts bool values to lowercase strings
        """
        for k, v in d.items():
            if isinstance(v, bool):
                d[k] = str(v).lower()

        return d

    def _options(self, **kwargs):
        """
        Formats search parameters/values for use with API
        :param \*\*kwargs: search parameters/values
        """
        def _format_fq(d):
            for k, v in d.items():
                if isinstance(v, list):
                    d[k] = ' '.join(map(lambda x: '"' + x + '"', v))
                else:
                    d[k] = '"' + str(v) + '"'
            values = []
            for k, v in d.items():
                value = '%s:(%s)' % (k, v)
                values.append(value)
            values = ' AND '.join(values)
            return values

        kwargs = self._bool_encode(kwargs)

        values = ''

        for k, v in kwargs.items():
            if k is 'fq' and isinstance(v, dict):
                v = _format_fq(v)
            elif isinstance(v, list):
                v = ','.join(v)
            values += '%s=%s&' % (k, v)

        return values

    def search(self,
               response_format=None,
               key=None,
               **kwargs):
        """
        Calls the API and returns a dictionary of the search results
        :param response_format: the format that the API uses for its response,
                                includes JSON (.json) and JSONP (.jsonp).
                                Defaults to '.json'.
        :param key: a developer key. Defaults to key given when the articleAPI class was initialized.
        """
        if response_format is None:
            response_format = self.response_format
        if key is None:
            key = self.key

        url = '%s%s?%sapi-key=%s' % (
            API_ROOT, response_format, self._options(**kwargs), key
        )

        r = requests.get(url)
        return r.json()

In [2]:
api = articleAPI('234342221c0c4f2fa969f69d92a6f700')

In [3]:
def parse_articles(articles):
    '''
    This function takes in a response to the NYT api and parses
    the articles into a list of dictionaries
    '''
    news = []
    for i in articles['response']['docs']:
        dic = {}
        dic['id'] = i['_id']
        if i['abstract'] is not None:
            dic['abstract'] = i['abstract'].encode("utf8")
        dic['headline'] = i['headline']['main'].encode("utf8")
        dic['desk'] = i['news_desk']
        dic['date'] = i['pub_date'][0:10] # cutting time of day.
        dic['section'] = i['section_name']
        if i['snippet'] is not None:
            dic['snippet'] = i['snippet'].encode("utf8")
        dic['source'] = i['source']
        dic['type'] = i['type_of_material']
        dic['url'] = i['web_url']
        dic['word_count'] = i['word_count']
        # locations
        locations = []
        for x in range(0,len(i['keywords'])):
            if 'glocations' in i['keywords'][x]['name']:
                locations.append(i['keywords'][x]['value'])
        dic['locations'] = locations
        # subject
        subjects = []
        for x in range(0,len(i['keywords'])):
            if 'subject' in i['keywords'][x]['name']:
                subjects.append(i['keywords'][x]['value'])
        dic['subjects'] = subjects   
        news.append(dic)
    return(news)

In [4]:
def get_articles(query):
    '''
    This function needs to change  begin_date and number of pages 10 artilces per page
    '''
    all_articles = []
    for i in range(0,20): #NYT limits pager to first 100 pages. But rarely will you find over 100 pages of results anyway.
        articles = api.search(q = query,
#               fq = {'source':['Reuters','AP', 'The New York Times']},
               begin_date = 20160101,
               sort='newest',
               page = str(i))
        articles = parse_articles(articles)
        all_articles = all_articles + articles
    return(all_articles)

# GIVE ME A KEY WORD OR COMPANY NAME TO RUN 

In [5]:
UPSall = get_articles("United Parcel Service")
len(UPSall)

126

In [6]:
import pandas as pd
ups = pd.DataFrame(UPSall)

In [23]:
ups

Unnamed: 0,abstract,date,desk,headline,id,locations,section,snippet,source,subjects,type,url,word_count
0,,2017-05-05,,b'Two Die in Cargo Plane Crash at West Virgini...,590c782a7c459f24986ddea5,[],U.S.,b'Two people died when an incoming cargo plane...,Reuters,[],News,https://www.nytimes.com/reuters/2017/05/05/us/...,260
1,,2017-04-30,,b'UPS Air Maintenance Workers Threaten Strike ...,590607567c459f24986dcf3c,[],Business Day,"b'A union representing 1,200 U.S. air maintena...",Reuters,[],News,https://www.nytimes.com/reuters/2017/04/30/bus...,408
2,,2017-04-27,,b'Markets Right Now: Meager Gains Are Enough f...,5901f5707c459f24986dc718,[],U.S.,b'The latest on developments in financial mark...,AP,[],News,https://www.nytimes.com/aponline/2017/04/27/us...,340
3,,2017-04-27,,b'UPS First-Quarter Profit Tops Estimates as R...,5901df937c459f24986dc6c0,[],Business Day,b'United Parcel Service Inc reported a higher-...,Reuters,[],News,https://www.nytimes.com/reuters/2017/04/27/bus...,380
4,,2017-04-27,,b'UPS Tops Street 1Q Forecasts',5901ded77c459f24986dc6be,[],Business Day,b'United Parcel Service Inc. is reporting firs...,AP,[],News,https://www.nytimes.com/aponline/2017/04/27/bu...,161
5,,2017-04-26,,b'UPS Wins $2.35 Billion U.S. Defense Contract...,590110d47c459f24986dc4b9,[],Business Day,b'United Parcel Service Co was awarded a five-...,Reuters,[],News,https://www.nytimes.com/reuters/2017/04/26/bus...,56
6,,2017-04-21,,b'Cops: 10 Pounds of Pot Wrongly Sent to Penns...,58fa060f7c459f24986db5e7,[],U.S.,b'Police are trying to determine who shipped 1...,AP,[],News,https://www.nytimes.com/aponline/2017/04/21/us...,134
7,,2017-04-17,,"b""The Long, Rough Ride Ahead for 'Made in Amer...",58f4a27c7c459f24986da87c,[],Business Day,b'Mini motorcycle and go-kart maker Monster Mo...,Reuters,[],News,https://www.nytimes.com/reuters/2017/04/17/bus...,1105
8,,2017-04-07,SundayBusiness,"b'Hey Hey, My My: Aging Rock Fans Still Hold T...",58e801727c459f24986d9172,[Indio (Calif)],Business Day,b'Rock \xe2\x80\x99n\xe2\x80\x99 roll will nev...,The New York Times,"[Festivals, Pop and Rock Music, Retirement, El...",News,https://www.nytimes.com/2017/04/07/business/re...,1159
9,,2017-04-07,,b'Dog Attacks on Mail Carriers Rise Again as O...,58e745127c459f24986d8ee3,[],U.S.,"b""Letter carrier James Solomon thought he knew...",AP,[],News,https://www.nytimes.com/aponline/2017/04/07/us...,738


In [21]:
ups

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,b'Two Die in Cargo Plane Crash at West Virgini...,[],b'Two people died when an incoming cargo plane...,,News,https://www.nytimes.com/reuters/2017/05/05/us/...,2017-05-05,Reuters,590c782a7c459f24986ddea5,U.S.,[],260,
1,b'UPS Air Maintenance Workers Threaten Strike ...,[],"b'A union representing 1,200 U.S. air maintena...",,News,https://www.nytimes.com/reuters/2017/04/30/bus...,2017-04-30,Reuters,590607567c459f24986dcf3c,Business Day,[],408,
2,b'Markets Right Now: Meager Gains Are Enough f...,[],b'The latest on developments in financial mark...,,News,https://www.nytimes.com/aponline/2017/04/27/us...,2017-04-27,AP,5901f5707c459f24986dc718,U.S.,[],340,
3,b'UPS First-Quarter Profit Tops Estimates as R...,[],b'United Parcel Service Inc reported a higher-...,,News,https://www.nytimes.com/reuters/2017/04/27/bus...,2017-04-27,Reuters,5901df937c459f24986dc6c0,Business Day,[],380,
4,b'UPS Tops Street 1Q Forecasts',[],b'United Parcel Service Inc. is reporting firs...,,News,https://www.nytimes.com/aponline/2017/04/27/bu...,2017-04-27,AP,5901ded77c459f24986dc6be,Business Day,[],161,
5,b'UPS Wins $2.35 Billion U.S. Defense Contract...,[],b'United Parcel Service Co was awarded a five-...,,News,https://www.nytimes.com/reuters/2017/04/26/bus...,2017-04-26,Reuters,590110d47c459f24986dc4b9,Business Day,[],56,
6,b'Cops: 10 Pounds of Pot Wrongly Sent to Penns...,[],b'Police are trying to determine who shipped 1...,,News,https://www.nytimes.com/aponline/2017/04/21/us...,2017-04-21,AP,58fa060f7c459f24986db5e7,U.S.,[],134,
7,"b""The Long, Rough Ride Ahead for 'Made in Amer...",[],b'Mini motorcycle and go-kart maker Monster Mo...,,News,https://www.nytimes.com/reuters/2017/04/17/bus...,2017-04-17,Reuters,58f4a27c7c459f24986da87c,Business Day,[],1105,
8,"b'Hey Hey, My My: Aging Rock Fans Still Hold T...",[Indio (Calif)],b'Rock \xe2\x80\x99n\xe2\x80\x99 roll will nev...,SundayBusiness,News,https://www.nytimes.com/2017/04/07/business/re...,2017-04-07,The New York Times,58e801727c459f24986d9172,Business Day,"[Festivals, Pop and Rock Music, Retirement, El...",1159,
9,b'Dog Attacks on Mail Carriers Rise Again as O...,[],"b""Letter carrier James Solomon thought he knew...",,News,https://www.nytimes.com/aponline/2017/04/07/us...,2017-04-07,AP,58e745127c459f24986d8ee3,U.S.,[],738,
