In [1]:
from textblob import TextBlob
from datetime import datetime
import requests as re
import pandas as pd
import time

In [2]:
def archives(year, month, key = '2E0ntEO9AFP3tUkfnbdyhs4h5Y2AHc0x'):
    ''' extract archive response for a given year and month from NYT from:
        https://api.nytimes.com/svc/archive/v1/
    
    Args: 
        year (int): integer representing year
        month (int): integer representing months from 1-12
        key (str): API access key
    
    Returns: 
        response.json(): a json response for all articles in a given year and month    
    '''
    link = f"https://api.nytimes.com/svc/archive/v1/{year}/{month}.json?api-key={key}"
    headers = {"Accept": "application/json"}
    response = re.get(link, headers=headers)    
    
    # NYT API requires sleeping for 6 seconds between each call
    time.sleep(6)
    
    return response.json()

In [3]:
def news_df(years, months):
    ''' Extract business news headlines, abstract and date for a given year and month
    
    Args: 
        years (list): list of years used for this project 2015-2019
        months (list): list of months from 1-12
    
    Returns: 
        news_df (pd.DataFrame): dataframe with business news headlines, abstract and dates from 2015-2019 
    '''
    news_list = []
    
    for year in years:
        for month in months:
            archive = archives(year, month)
            
            # only save headline, abstract and date
            for article in archive['response']['docs']:
                if (article['section_name'] == 'Business Day'):
                    news_list.append({'headline':article['headline']['main'],
                                      'abstract':article['abstract'],
                                      'date': article['pub_date']})

    news_df = pd.DataFrame(news_list)    

    return news_df

In [4]:
# This code was ran in segmented timeframes to not overwhelm the system 
# months = list(range(1,13))
# years = list(range(2015,2020))
# df_news = news_df(years, months)'''

# eventually saved and appended to the csv
# df_news.to_csv('news.csv')

In [5]:
# For the sake of demonstrating our sentiment score generation, we will load the saved csv
nyt_df = pd.read_csv('news.csv').dropna().reset_index()
nyt_df = nyt_df.drop(columns = ['Unnamed: 0', 'index'])
nyt_df

Unnamed: 0,headline,abstract,date
0,A Guide to Minimum Wage Increases at the State...,"By Jan. 1, 29 states and the District of Colum...",2015-01-01T01:45:03+0000
1,"States’ Minimum Wages Rise, Helping Millions o...",Minimum wage increases go into effect in 20 st...,2015-01-01T01:45:10+0000
2,"Government Spending, Edging Up, Is a Stimulus",State and local governments are spending on pr...,2015-01-01T18:16:34+0000
3,Digital Tax Increase to Take Effect in Europe,New rules are coming into force for services l...,2015-01-01T18:22:47+0000
4,"By ‘Editing’ Plant Genes, Companies Avoid Regu...",Critics of bioengineered crops are concerned t...,2015-01-01T23:02:04+0000
...,...,...,...
22727,"Carlos Ghosn, Fugitive but a Favorite Son, Ret...","For many residents, he represents the Lebanese...",2019-12-31T13:04:30+0000
22728,"Carlos Ghosn’s Escape: A Lawyer in Beirut, a F...",The prominent former auto executive eluded the...,2019-12-31T18:00:12+0000
22729,Recent Commercial Real Estate Transactions,Recent commercial real estate transactions in ...,2019-12-31T19:40:38+0000
22730,"The Warriors Get a New Home, and New Neighbors","Chase Center, in San Francisco’s Mission Bay, ...",2019-12-31T21:26:24+0000


In [6]:
def str_to_dt(date_string):
    ''' convert date string into datetime format
    Args:
        date_string (str): date in string format
        
    Returns:
        dt (datetime): datetime formatted date
    '''
    dt = datetime.strptime(date_string[:10], '%Y-%m-%d').date()
    
    return dt

nyt_df['date'] = nyt_df['date'].map(str_to_dt)
nyt_df

Unnamed: 0,headline,abstract,date
0,A Guide to Minimum Wage Increases at the State...,"By Jan. 1, 29 states and the District of Colum...",2015-01-01
1,"States’ Minimum Wages Rise, Helping Millions o...",Minimum wage increases go into effect in 20 st...,2015-01-01
2,"Government Spending, Edging Up, Is a Stimulus",State and local governments are spending on pr...,2015-01-01
3,Digital Tax Increase to Take Effect in Europe,New rules are coming into force for services l...,2015-01-01
4,"By ‘Editing’ Plant Genes, Companies Avoid Regu...",Critics of bioengineered crops are concerned t...,2015-01-01
...,...,...,...
22727,"Carlos Ghosn, Fugitive but a Favorite Son, Ret...","For many residents, he represents the Lebanese...",2019-12-31
22728,"Carlos Ghosn’s Escape: A Lawyer in Beirut, a F...",The prominent former auto executive eluded the...,2019-12-31
22729,Recent Commercial Real Estate Transactions,Recent commercial real estate transactions in ...,2019-12-31
22730,"The Warriors Get a New Home, and New Neighbors","Chase Center, in San Francisco’s Mission Bay, ...",2019-12-31


In [7]:
def abstract_pol(abstract):
    ''' find the news abstract polarity score
    Args:
        abstract (str): the abstract of the given news article
    
    Retruns:
        pol (float): polarity sentiment score of the abstract
    '''
    pol, sub = TextBlob(str(abstract)).sentiment
    return pol

# add nyt polarity scores
nyt_df['nyt_pol'] = nyt_df['abstract'].map(abstract_pol)
nyt_df

Unnamed: 0,headline,abstract,date,nyt_pol
0,A Guide to Minimum Wage Increases at the State...,"By Jan. 1, 29 states and the District of Colum...",2015-01-01,0.000000
1,"States’ Minimum Wages Rise, Helping Millions o...",Minimum wage increases go into effect in 20 st...,2015-01-01,0.000000
2,"Government Spending, Edging Up, Is a Stimulus",State and local governments are spending on pr...,2015-01-01,0.000000
3,Digital Tax Increase to Take Effect in Europe,New rules are coming into force for services l...,2015-01-01,0.318182
4,"By ‘Editing’ Plant Genes, Companies Avoid Regu...",Critics of bioengineered crops are concerned t...,2015-01-01,0.000000
...,...,...,...,...
22727,"Carlos Ghosn, Fugitive but a Favorite Son, Ret...","For many residents, he represents the Lebanese...",2019-12-31,0.416667
22728,"Carlos Ghosn’s Escape: A Lawyer in Beirut, a F...",The prominent former auto executive eluded the...,2019-12-31,0.250000
22729,Recent Commercial Real Estate Transactions,Recent commercial real estate transactions in ...,2019-12-31,0.084091
22730,"The Warriors Get a New Home, and New Neighbors","Chase Center, in San Francisco’s Mission Bay, ...",2019-12-31,0.200000


In [8]:
def abstract_sub(abstract):
    ''' find the news abstract subjectivity score
    Args:
        abstract (str): the abstract of the given news article
    
    Retruns:
        sub (float): subjectivity sentiment score of the abstract
    '''
    pol, sub = TextBlob(str(abstract)).sentiment
    return sub

# add nyt subjectivity scores
nyt_df['nyt_sub'] = nyt_df['abstract'].map(abstract_sub)
nyt_df

Unnamed: 0,headline,abstract,date,nyt_pol,nyt_sub
0,A Guide to Minimum Wage Increases at the State...,"By Jan. 1, 29 states and the District of Colum...",2015-01-01,0.000000,0.100000
1,"States’ Minimum Wages Rise, Helping Millions o...",Minimum wage increases go into effect in 20 st...,2015-01-01,0.000000,0.000000
2,"Government Spending, Edging Up, Is a Stimulus",State and local governments are spending on pr...,2015-01-01,0.000000,0.000000
3,Digital Tax Increase to Take Effect in Europe,New rules are coming into force for services l...,2015-01-01,0.318182,0.477273
4,"By ‘Editing’ Plant Genes, Companies Avoid Regu...",Critics of bioengineered crops are concerned t...,2015-01-01,0.000000,0.100000
...,...,...,...,...,...
22727,"Carlos Ghosn, Fugitive but a Favorite Son, Ret...","For many residents, he represents the Lebanese...",2019-12-31,0.416667,0.450000
22728,"Carlos Ghosn’s Escape: A Lawyer in Beirut, a F...",The prominent former auto executive eluded the...,2019-12-31,0.250000,0.500000
22729,Recent Commercial Real Estate Transactions,Recent commercial real estate transactions in ...,2019-12-31,0.084091,0.251136
22730,"The Warriors Get a New Home, and New Neighbors","Chase Center, in San Francisco’s Mission Bay, ...",2019-12-31,0.200000,0.300000


In [9]:
# find the average polarity and subjectivity scores for each day
nyt_pol = nyt_df.groupby('date')['nyt_pol'].mean()
nyt_sub = nyt_df.groupby('date')['nyt_sub'].mean()

In [10]:
# create new datframe of just the scores
nyt_score_df = pd.DataFrame(nyt_pol)
nyt_score_df = nyt_score_df.merge(nyt_sub, on='date')
nyt_score_df.head()

Unnamed: 0_level_0,nyt_pol,nyt_sub
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01,0.045455,0.149134
2015-01-02,0.072037,0.32154
2015-01-03,0.059295,0.465729
2015-01-04,0.11875,0.608333
2015-01-05,0.181212,0.396078


In [11]:
# save nyt sentiment scores as csv to be combined with other platform scores
nyt_score_df.to_csv('nyt_scores.csv')