In [1]:
import pandas as pd
import numpy as np
import requests
import json
import time
import os

from dotenv import load_dotenv
from datetime import datetime

In [2]:
load_dotenv()

from ibm_watson import IAMTokenManager
from ibm_cloud_sdk_core.authenticators import BearerTokenAuthenticator
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions, SentimentOptions, EmotionOptions

In [3]:
iam_token_manager = IAMTokenManager(apikey=os.getenv("ASSISTANT_API_KEY"))
token = iam_token_manager.get_token()

authenticator = BearerTokenAuthenticator(token)

service = NaturalLanguageUnderstandingV1(version='2020-08-01',
                        authenticator=authenticator)
service.set_service_url(os.getenv("ASSISTANT_URL"))

In [4]:
response = service.analyze(
    text='Bruce Banner is the Hulk and Bruce Wayne is BATMAN! '
    'Superman fears not Banner, but Wayne.',
    features=Features(entities=EntitiesOptions(),
                      keywords=KeywordsOptions(),
                      sentiment=SentimentOptions(),
                      emotion=EmotionOptions())).get_result()

In [5]:
response['sentiment']['document']['score']

-0.550253

In [6]:
response['emotion']['document']

{'emotion': {'sadness': 0.101967,
  'joy': 0.304163,
  'fear': 0.065787,
  'disgust': 0.083401,
  'anger': 0.121029}}

In [7]:
nyt_df = pd.read_csv('nyt_full_data.csv')

In [8]:
nyt_df["Date"] = pd.to_datetime(nyt_df["Date"])
nyt_df = nyt_df.set_index("Date")
nyt_df = nyt_df.sort_index(axis=0)

In [9]:
nyt_df = nyt_df.dropna()

In [10]:
d = {
    "headline sentiment" : [],
    "headline sadness" : [],
    "headline joy" : [],
    "headline fear" : [],
    "headline disgust" : [],
    "headline anger" : [],
    "lead sentiment" : [],
    "lead sadness" : [],
    "lead joy" : [],
    "lead fear" : [],
    "lead disgust" : [],
    "lead anger" : [],
}

In [11]:
for headline in nyt_df["Headline"]:

    try:
        response = service.analyze(
            text=headline,
            features=Features(
                sentiment=SentimentOptions(),
                emotion=EmotionOptions()
            )
        ).get_result()
    
        if 'sentiment' in response:
            d["headline sentiment"].append(response['sentiment']['document']['score'])
        else:
            d["headline sentiment"].append(0)
        
        if 'emotion' in response:
            d["headline sadness"].append(response['emotion']['document']['emotion']['sadness'])
            d["headline joy"].append(response['emotion']['document']['emotion']['joy'])
            d["headline fear"].append(response['emotion']['document']['emotion']['fear'])
            d["headline disgust"].append(response['emotion']['document']['emotion']['disgust'])
            d["headline anger"].append(response['emotion']['document']['emotion']['anger'])
        
        else:
            d["headline sadness"].append(0)
            d["headline joy"].append(0)
            d["headline fear"].append(0)
            d["headline disgust"].append(0)
            d["headline anger"].append(0)
            
    except:
        d["headline sentiment"].append(0)
        d["headline sadness"].append(0)
        d["headline joy"].append(0)
        d["headline fear"].append(0)
        d["headline disgust"].append(0)
        d["headline anger"].append(0)
        
for paragraph in nyt_df["Lead Paragraph"]:

    try:
        response = service.analyze(
            text=paragraph,
            features=Features(
                sentiment=SentimentOptions(),
                emotion=EmotionOptions()
            )
        ).get_result()
    
        if 'sentiment' in response:
            d["lead sentiment"].append(response['sentiment']['document']['score'])
        else:
            d["lead sentiment"].append(0)
        
        if 'emotion' in response:
            d["lead sadness"].append(response['emotion']['document']['emotion']['sadness'])
            d["lead joy"].append(response['emotion']['document']['emotion']['joy'])
            d["lead fear"].append(response['emotion']['document']['emotion']['fear'])
            d["lead disgust"].append(response['emotion']['document']['emotion']['disgust'])
            d["lead anger"].append(response['emotion']['document']['emotion']['anger'])
        
        else:
            d["lead sadness"].append(0)
            d["lead joy"].append(0)
            d["lead fear"].append(0)
            d["lead disgust"].append(0)
            d["lead anger"].append(0)
            
    except:
        d["lead sentiment"].append(0)
        d["lead sadness"].append(0)
        d["lead joy"].append(0)
        d["lead fear"].append(0)
        d["lead disgust"].append(0)
        d["lead anger"].append(0)

ERROR:root:not enough text for language id
Traceback (most recent call last):
  File "C:\Users\mathl\anaconda3\envs\project2env\lib\site-packages\ibm_cloud_sdk_core\base_service.py", line 246, in send
    response.status_code, http_response=response)
ibm_cloud_sdk_core.api_exception.ApiException: Error: not enough text for language id, Code: 422 , X-global-transaction-id: 74619f11-f319-494b-b9d8-6fe04053b1c0
ERROR:root:not enough text for language id
Traceback (most recent call last):
  File "C:\Users\mathl\anaconda3\envs\project2env\lib\site-packages\ibm_cloud_sdk_core\base_service.py", line 246, in send
    response.status_code, http_response=response)
ibm_cloud_sdk_core.api_exception.ApiException: Error: not enough text for language id, Code: 422 , X-global-transaction-id: 1f2fcfe8-c744-43b0-896d-4a66b9f0b928
ERROR:root:unsupported text language: la
Traceback (most recent call last):
  File "C:\Users\mathl\anaconda3\envs\project2env\lib\site-packages\ibm_cloud_sdk_core\base_service.

In [12]:
d.keys()

dict_keys(['headline sentiment', 'headline sadness', 'headline joy', 'headline fear', 'headline disgust', 'headline anger', 'lead sentiment', 'lead sadness', 'lead joy', 'lead fear', 'lead disgust', 'lead anger'])

In [13]:
nyt_df['Headline Sentiment'] = d['headline sentiment']
nyt_df['Headline Sadness'] = d['headline sadness']
nyt_df['Headline Joy'] = d['headline joy']
nyt_df['Headline Fear'] = d['headline fear']
nyt_df['Headline Disgust'] = d['headline disgust']
nyt_df['Headline Anger'] = d['headline anger']
nyt_df['Lead Paragraph Sentiment'] = d['lead sentiment']
nyt_df['Lead Paragraph Sadness'] = d['lead sadness']
nyt_df['Lead Paragraph Joy'] = d['lead joy']
nyt_df['Lead Paragraph Fear'] = d['lead fear']
nyt_df['Lead Paragraph Disgust'] = d['lead disgust']
nyt_df['Lead Paragraph Anger'] = d['lead anger']

In [14]:
nyt_df

Unnamed: 0_level_0,Source,Headline,Lead Paragraph,URL,Headline Sentiment,Headline Sadness,Headline Joy,Headline Fear,Headline Disgust,Headline Anger,Lead Paragraph Sentiment,Lead Paragraph Sadness,Lead Paragraph Joy,Lead Paragraph Fear,Lead Paragraph Disgust,Lead Paragraph Anger
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-10-01,The New York Times,"Italy’s daily new cases topped 2,000 for the f...",Italy registered a number of infections exceed...,https://www.nytimes.com/2020/10/01/world/italy...,0.000000,0.223045,0.653405,0.074037,0.028137,0.019463,-0.474495,0.213385,0.629091,0.102553,0.024029,0.019349
2020-10-01,The New York Times,"Citing security, Texas governor limits countie...",Gov. Greg Abbott of Texas issued a proclamatio...,https://www.nytimes.com/2020/10/01/us/election...,0.000000,0.322699,0.091246,0.145323,0.148526,0.143236,-0.865934,0.470811,0.035511,0.020349,0.136890,0.118237
2020-10-01,The New York Times,A day after refusing to condemn white supremac...,A day after he refused to condemn white suprem...,https://www.nytimes.com/2020/10/01/us/election...,-0.710345,0.139055,0.008897,0.357865,0.565104,0.289598,-0.874280,0.064722,0.026939,0.231966,0.549263,0.493651
2020-10-01,The New York Times,Membership of Anti-Mask Facebook Groups Jumps ...,It’s no surprise that people pushing anti-mask...,https://www.nytimes.com/2020/10/01/technology/...,0.000000,0.365323,0.225491,0.037281,0.193551,0.326865,-0.813675,0.560665,0.139525,0.057090,0.132173,0.333184
2020-10-01,The New York Times,"Despite Billions in Fees, Banks Predict Meager...",Loath to be seen profiting from the economic d...,https://www.nytimes.com/2020/10/01/business/pp...,-0.772085,0.470490,0.154046,0.017529,0.068231,0.188812,0.389565,0.198431,0.301770,0.056589,0.206573,0.106570
2020-10-01,The New York Times,"After 2 Years of Paralysis, Belgium Forms a (V...","BRUSSELS — For nearly two years, Belgium has b...",https://www.nytimes.com/2020/10/01/world/europ...,-0.597614,0.228999,0.177824,0.032788,0.108682,0.063435,-0.773971,0.651144,0.024095,0.284036,0.172001,0.145083
2020-10-01,The New York Times,"Facing Grim Polls, Trump Leans Into Playing th...",[Follow our latest coverage of the Biden vs. T...,https://www.nytimes.com/2020/10/01/us/politics...,-0.674420,0.368872,0.130213,0.089112,0.492832,0.072533,0.000000,0.227488,0.065503,0.112038,0.022493,0.147882
2020-10-01,The New York Times,The Trump Debate Fallout,Senate Republicans do the old dance around Tru...,https://www.nytimes.com/2020/10/01/us/politics...,0.000000,0.494645,0.049587,0.123372,0.341944,0.271970,-0.581098,0.239862,0.119414,0.065171,0.542714,0.168471
2020-10-01,The New York Times,"Airlines, Facing a Painfully Slow Recovery, Be...",When Congress gave passenger airlines a $50 bi...,https://www.nytimes.com/2020/10/01/business/ai...,-0.923459,0.589948,0.170951,0.100936,0.059937,0.218463,-0.745318,0.403561,0.319805,0.161091,0.061766,0.065882
2020-10-01,The New York Times,China’s ‘Golden Week’ Kicks Off in Boost to Ba...,"Along the Great Wall, extra security guards ha...",https://www.nytimes.com/2020/10/01/world/asia/...,0.543077,0.080705,0.688356,0.028989,0.031417,0.167604,0.280929,0.283372,0.461656,0.190409,0.028025,0.074882


In [15]:
nyt_df.to_csv("raw_nyt_watson.csv", index=True)