In [4]:
import json
import requests
import datetime
import time
import pandas as pd
from collections import defaultdict
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [29]:
SLEEP_DURATION = 7

def find_news_articles(begindate, nytimes_section):
    base_url = 'https://api.nytimes.com/svc/search/v2/articlesearch.json?'
    api_key = 'INSERT_KEY_HERE'
    facet_str = f'&facet=true&begin_date={begindate}&end_date={begindate}'
    
    page = 0
    count = 0
    
    ret_list = []
    while True:
        url = base_url+nytimes_section+facet_str+f'&page={page}'+api_key
        r = requests.get(url)
        if r.status_code != 200:
            print(r.status_code)
        data = json.loads(r.content)
        time.sleep(SLEEP_DURATION)
        if page == 0:
            tot_articles = data['response']['meta']['hits']
            print(begindate, nytimes_section, 'tot_articles', tot_articles)
        for i, doc in enumerate(data['response']['docs']):
            ret_list.append((doc['headline']['main'], doc['lead_paragraph'], doc['web_url']))
            count += 1
        if count >= tot_articles:
            break
        page += 1
    #print(len(ret_list))
    return ret_list

In [30]:
BATCH_SIZE = 30

today = datetime.datetime.today()
begin_date = datetime.datetime.strptime('25/03/2007', '%d/%m/%Y')

while begin_date <= today:

    daily_sentiment = defaultdict(defaultdict)
    sid_obj = SentimentIntensityAnalyzer()
    
    i = 0
    while i < BATCH_SIZE:
        days_sentiment_pos, days_sentiment_neg, days_sentiment_neu, days_sentiment_comp = 0, 0, 0, 0
        date_str = str(begin_date.year) + str(begin_date.month).zfill(2) + str(begin_date.day).zfill(2)

        news_desk_str = 'fq=news_desk:("Financial" "Business" "Business Day")'
        section_str = 'fq=section_name:("Your Money" "Business" "Business Day")'

        news_desk_list = find_news_articles(date_str, news_desk_str)
        section_list = find_news_articles(date_str, section_str)

        final_urls = set()
        for news in news_desk_list: # tuple of 3: headline, lead_paragraph, web_url
            if news[2] not in final_urls:
                # print('adding news desk article', news[0])
                final_urls.add(news[2])
                sentiment_dict = sid_obj.polarity_scores(news[0] + news[1])
                days_sentiment_pos += sentiment_dict['pos']
                days_sentiment_neg += sentiment_dict['neg']
                days_sentiment_neu += sentiment_dict['neu']
                days_sentiment_comp += sentiment_dict['compound']
        for news in section_list: # tuple of 3: headline, lead_paragraph, web_url
            if news[2] not in final_urls:
                # print('adding section article', news[0])
                final_urls.add(news[2])
                sentiment_dict = sid_obj.polarity_scores(news[0] + news[1])
                days_sentiment_pos += sentiment_dict['pos']
                days_sentiment_neg += sentiment_dict['neg']
                days_sentiment_neu += sentiment_dict['neu']
                days_sentiment_comp += sentiment_dict['compound']

        num_news_items = len(final_urls)
        if num_news_items > 0:
            daily_sentiment[date_str]['pos'] = days_sentiment_pos/num_news_items
            daily_sentiment[date_str]['neg'] = days_sentiment_neg/num_news_items
            daily_sentiment[date_str]['neu'] = days_sentiment_neu/num_news_items
            daily_sentiment[date_str]['compound'] = days_sentiment_comp/num_news_items
        else:
            daily_sentiment[date_str]['pos'] = 0
            daily_sentiment[date_str]['neg'] = 0
            daily_sentiment[date_str]['neu'] = 0
            daily_sentiment[date_str]['compound'] = 0
        i += 1
        begin_date += datetime.timedelta(days=1)
    df = pd.DataFrame(daily_sentiment).T
    df.to_csv('sentiment_data/' + date_str + '.csv')

20070325 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 4
20070325 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 36
20070326 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 59
20070326 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 86
20070327 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 61
20070327 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 77
20070328 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 63
20070328 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 79
20070329 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 69
20070329 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 90
20070330 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 57
20070330 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 80
20070331 fq=news_desk:("Financial" "Busin

20070515 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 90
20070516 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 60
20070516 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 90
20070517 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 67
20070517 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 73
20070518 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 64
20070518 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 82
20070519 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 25
20070519 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 25
20070520 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 1
20070520 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 41
20070521 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 62
20070521 fq=section_name:("Your Money" "B

20070706 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 62
20070706 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 80
20070707 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 29
20070707 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 24
20070708 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 2
20070708 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 47
20070709 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 67
20070709 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 96
20070710 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 72
20070710 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 88
20070711 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 69
20070711 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 81
20070712 fq=news_desk:("Financial" "Busin

20070826 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 33
20070827 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 67
20070827 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 81
20070828 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 58
20070828 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 82
20070829 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 54
20070829 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 73
20070830 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 66
20070830 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 71
20070831 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 57
20070831 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 68
20070901 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 21
20070901 fq=section_name:("Your Money" "

20071017 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 66
20071017 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 88
20071018 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 78
20071018 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 84
20071019 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 59
20071019 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 78
20071020 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 18
20071020 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 21
20071021 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 0
20071021 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 29
20071022 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 73
20071022 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 98
20071023 fq=news_desk:("Financial" "Busin

20071207 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 80
20071208 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 30
20071208 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 35
20071209 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 2
20071209 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 21
20071210 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 64
20071210 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 78
20071211 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 77
20071211 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 100
20071212 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 69
20071212 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 89
20071213 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 91
20071213 fq=section_name:("Your Money" "

20080128 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 71
20080128 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 109
20080129 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 68
20080129 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 104
20080130 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 71
20080130 fq=section_name:("Your Money" "Business" "Business Day") tot_articles 137
20080131 fq=news_desk:("Financial" "Business" "Business Day") tot_articles 84
429


KeyError: 'response'