### **Obtain Sentiment Analysis of Articles**

Using IBM Watson's Natural Language Understanding [API](https://cloud.ibm.com/apidocs/natural-language-understanding), the general sentiment of article content or of specific target phrases within article content is analyzed. The information returned consists of a sentiment score ranging from -1 (negative) to 1 (positive) & a polarity classification based on the score.

In [None]:
import os
import sys

sys.path.append('./watson_developer_cloud/')

In [None]:
# from watson_developer_cloud import NaturalLanguageUnderstandingV1

In [None]:
os.getcwd()

In [None]:
from platform import python_version
print(python_version())

In [None]:
# !pip install watson_developer_cloud
# import watson_developer_cloud

In [5]:
import json
import time
import pandas as pd

# from watson_developer_cloud import NaturalLanguageUnderstandingV1
# from watson_developer_cloud.natural_language_understanding_v1 import Features, SentimentOptions

In [None]:
api_key = json.loads(open("../API/IBM_Cloud/api_key.txt").read())['nat_lang']
endpoint = 'https://gateway.watsonplatform.net/natural-language-understanding/api'

In [26]:
pwd

'/Users/lhe/Nielsen /Fellowship_Spring2019'

In [1]:
from library.data_dict import *

In [16]:
# comments['wzN0z3cKtYQ']

In [22]:
# comments.decode('utf8','surrogateescape')
# pd.DataFrame.from_dict(comments).transpose()

In [25]:
json.dumps('library/data_dict.json')

'"library/data_dict.json"'

#### **1. Obtain Analysis from Watson API for list of URLs**

##### **1a. Get Sentiment Analysis for Single URL**

*If sentiment is able to be obtained, an analysis dictionary is returned.*

In [None]:
import json
# from watson_developer_cloud import NaturalLanguageUnderstandingV1
from watson_developer_cloud.natural_language_understanding_v1 import Features, RelationsOptions

def obtain_sentiment_analysis_from_string(txt):
    natural_language_understanding = NaturalLanguageUnderstandingV1( #to access api
        version='2018-11-16',
        iam_apikey= api_key,
        url = endpoint
    )

    response = natural_language_understanding.analyze(
        text= txt,
        features=Features(relations=RelationsOptions())).get_result()

    print(json.dumps(response, indent=2))

In [None]:
obtain_sentiment_analysis_from_string("Leonardo DiCaprio just won best actor!!")

In [None]:
def obtain_sentiment_analysis_from_url(url, target_str=''):

    '''
    NOTE: If passing in target_str, input as list!
    '''
    
    '''
    This function can:
    
        1) Perform a document-level sentiment analysis, or,

        2) Take in a keyword, target_str, and return the sentiment 
           for each target_str found in the document.
    '''
    
    natural_language_understanding = NaturalLanguageUnderstandingV1(
        version='2018-11-16',
        iam_apikey=api_key,
        url=endpoint
    )
    
    try:
        if target_str:
            response = natural_language_understanding.analyze(
                url=url,
                features=Features(sentiment=SentimentOptions(targets = target_str))).get_result()

            doc_level = response['sentiment']['document']
            target_level = response['sentiment']['targets'][0]
            source = response['retrieved_url'].split('/')[2]

            response_dict = {
                'source':source,
                'target':target_str,
                'url':url,
                'article_sentiment_label':doc_level['label'],
                'article_sentiment_score':doc_level['score'],
                'target_sentiment_label':target_level['label'],
                'target_sentiment_score':target_level['score']
            }

        else:
            response = natural_language_understanding.analyze(
                url=url,
                features=Features(sentiment=SentimentOptions(document=True))).get_result()

            doc_level = response['sentiment']['document']
            source = response['retrieved_url'].split('/')[2]

            response_dict = {
                'source':source,
                'url':url,
                'article_sentiment_label':doc_level['label'],
                'article_sentiment_score':doc_level['score']
            }
        return response_dict

    except:
        return None

##### **1b. Apply *obtain_sentiment_analysis* to all URLs**

In [None]:
df_path = '../../Downloads/readability_headlines_test_20190204.csv'
df = pd.read_csv(df_path)

In [None]:
def retrieve_analysis_df(df):

    analysis = pd.DataFrame()

    analysis['url'] = df['url'][0:250]
    analysis['headline'] = df['title'][0:250]
    analysis['sentiment_analysis'] = df['url'][0:250]\
                .apply(obtain_sentiment_analysis)

    return analysis

analysis = retrieve_analysis_df(df)

##### **1c. Format df (analysis) to convert each sentiment analysis dictionary row to df**

In [None]:
def get_final_df(analysis):
    
    analysis = analysis.dropna()
    valid_sentiment_analysis = [i for i in list(temp['sentiment_analysis']) 
                                if type(i)==dict]
    
    return pd.DataFrame(valid_sentiment_analysis)

final_analysis = get_final_df(analysis)

In [None]:
final_analysis