In [22]:
import requests
import datetime
import uuid

from secrets import api_key

In [None]:
numArticlesForDay = 10

In [38]:
def getArticlesForDate(searchDate):
    ''' Gets the articles for the date provided from the NY Times website
    
    Accepts : searchDate (datetime) date to query for records for
            
    Returns : (array) list of articles, in dictionary, found for that date
                title: (string) title of article
                category: (string) news desk that the article is from
                id: (string) unique identifier of the news article; created when downloaded
                sourceurl: (string) Url of the news article on the NY Times website
                content: (string) Text of the news article
                imageurl: (string) Url to image from the news article; could be empty
                publishdate: (int) Date when the article was published, yyyyMMdd
    '''
    
    #- Prepare Results
    results = []
    
    
    #- Create Parameters
    searchDateString = convertDateToString(searchDate)
    
    filterQuery = f'news_desk:("Business" "Business Day" "Technology" "Personal Tech" "Politics" ) ' \
                f'AND pub_date:({searchDateString})'

    parameters = {
        'fq' : filterQuery,
        'api-key' : api_key
        }
        
    
    #- Query API
    print(f"Requesting data from API: {searchDateString}")
    
    baseNytUrl = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
    
    response = requests.request("GET", baseNytUrl, params=parameters)
    
    
    # Check Response
    if (response.status_code == requests.codes.ok):
        
        # Get Json
        responseJson = response.json()
        
        
        for doc in responseJson['response']['docs']:
            results.append({
                "title" : doc['headline']['main'],
                "category": doc['news_desk'],
                "id": str(uuid.uuid4()),
                "sourceurl": doc['web_url'],
                "publishdate": convertDateToInt(searchDate),
                "imageurl": getArticleImageUrl(doc),
                "content": doc['lead_paragraph']
            })              
            
        
    else:
        print(f'Unable to get data. Date: {searchDateString} Code: {response.status_code}')
    
        raise Exception("Unable to get data")
        
    
    
    return results

In [6]:
def convertDateToString(searchDate):
    ''' Converts the provided date to string in "YYYY-MM-DD" format
    
    Accepts : searchDate (datetime) date to convert to string
    
    Returns : (string) formated string
    '''
    
    return searchDate.strftime('%Y-%m-%d')

In [34]:
def getArticleImageUrl(doc):
    ''' Gets the url to article; ensure the media type is image
    
    Accepts : Dictionary of the article
    
    Returns : (string) Url to the image; could be empty
    '''
    
    sourceUrl = ""
    
    for media in doc["multimedia"]:
        
        if (media['type'] == 'image'):
            sourceUrl = f'https://static01.nyt.com/{media["url"]}'
            
            break
            
    return sourceUrl

In [33]:
def convertDateToInt(searchDate):
    ''' Converts provided date to int in "YYYYMMDD" format
    
    Accepts : searchDate (datetime) date to convert to int
    
    Returns : (int) converted date
    '''

    return int(searchDate.strftime('%Y%m%d'))

In [39]:
searchDate = datetime.datetime(2018,8,1)

result = getArticlesForDate(searchDate)

print('completed')

Requesting data from API: 2018-08-01
completed


In [40]:
result

[{'title': 'Stumbles? What Stumbles? Big Tech Is as Strong as Ever',
  'category': 'Business',
  'id': '313d8400-10c3-496d-85a2-7d5ded6fde38',
  'sourceurl': 'https://www.nytimes.com/2018/08/01/technology/big-tech-earnings-stumbles.html',
  'publishdate': 20180801,
  'imageurl': 'https://static01.nyt.com/images/2018/08/02/business/02State-illo/02State.illo-articleLarge.gif',
  'content': 'You may have heard that the tech giants are on their heels.'},
 {'title': 'Gaza and Google Translate: Covering the Conflict When You Don’t Speak the Language',
  'category': 'Business',
  'id': 'f1230c8b-e05f-4663-ae0f-4163d077d0c5',
  'sourceurl': 'https://www.nytimes.com/2018/08/01/technology/personaltech/israel-gaza-google-translate.html',
  'publishdate': 20180801,
  'imageurl': 'https://static01.nyt.com/images/2018/08/02/business/02techusing1/merlin_141937026_d649cf4c-e5be-4a10-8b14-3782498fcfef-articleLarge.jpg',
  'content': 'How do New York Times journalists use technology in their jobs and in

In [32]:
dateValue = "2019-07-01T03:27:07+0000"

searchDate = datetime.datetime(2018,7,1)

int(searchDate.strftime('%Y%m%d'))

20180701