# Imports

In [1]:
import sys
import json
import unicodedata
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, SentimentOptions, EmotionOptions

# Authentification

In [2]:
authenticator = IAMAuthenticator('ZtAL7sIQ58MB8UrjOeYR_fvko36zVObg57OQ8y3FftDn')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12',
    authenticator=authenticator
)
natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/90eb04a6-053b-4fdd-a09a-2005d9fe421b')

# Analysis

In [3]:
# writes a json to fpath
def write_json(fpath, data):
    # write json back
    with open(fpath, 'w') as f:
        json.dump(data, f)

In [4]:
# removes control characters from a string
def remove_control_characters(s):
    return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C")

In [5]:
# takes the raw content of a article (an array of text paragraph with unicode control characters) and returns a sanatized complete text
def sanatize_content(raw_content):
    text = ''
    for i in range(len(raw_content)):
        sanatized_paragraph = remove_control_characters(raw_content[i])
        text += ' ' + sanatized_paragraph

    return text

In [32]:
# takes a json of one newssource and calls sentiment and emotion analysis for each article and writes them directly to the given json file
def analyze_newssource(fpath, json):
    no_articles = 0

    for article in json:
        # skip cycle, if the article was already analyzed or the article doesn't have content
        if('watson_analysis' in article or article['content'] == []):
            continue

        # print every 10 articles
        if(no_articles % 10 == 0):
            print('Analyzing article ' + str(no_articles) + ' to ' + str(no_articles + 10))
        
        no_articles += 1

        # sanatize content
        raw_content = article['content']
        sanatized_content = sanatize_content(raw_content)

        # analyze content
        sentiment, sadness, joy, fear, disgust, anger = analyze_text(sanatized_content, 0)

        # omit analysis if it yielded 'None' (has an exception analyzing)
        if(sentiment is None or sadness is None or joy is None or disgust is None or anger is None):
            continue

        # write analysis to json
        article['watson_analysis'] = {
            'sentiment' : sentiment,
            'sadness' : sadness,
            'joy' : joy,
            'fear' : fear,
            'disgust' : disgust,
            'anger' : anger,
        }

        # write json to file
        write_json(fpath, json)

    print('Analysis done. All changes written.')

In [29]:
# takes a text, requests analysis and returns sentiment and emotion results

def analyze_text(text, trial_count):
    if(trial_count > 5):
        print('Analysis failed 5 times. Skipping.')
        return

    try:
        response = natural_language_understanding.analyze(
            text=text,
            features=Features(sentiment=SentimentOptions(), emotion=EmotionOptions())
        ).get_result()
    except Exception:
        print('Analysis exception, retrying...')
        trial_counter += 1
        analyze_text(text, trial_count)

    try:
        sentiment = response['sentiment']['document']['score']
    except KeyError:
        sentiment = 'N/A'
        print('No value for sentiment.')

    try:
        sadness = response['emotion']['document']['emotion']['sadness']
        joy = response['emotion']['document']['emotion']['joy']
        fear = response['emotion']['document']['emotion']['fear']
        disgust = response['emotion']['document']['emotion']['disgust']
        anger = response['emotion']['document']['emotion']['anger']
    except KeyError:
        sadness = 'N/A'
        joy = 'N/A'
        fear = 'N/A'
        disgust = 'N/A'
        anger = 'N/A'
        print('No value for emotions.')

    return sentiment, sadness, joy, fear, disgust, anger

In [30]:
def analyze_newssources(newssources):

  for newssource in newssources:
      fpath = './dataset/' + newssource + '.json'

      # open json at fpath
      with open(fpath) as f:
        print('Opening ' + fpath)
        data = json.load(f)

      # append analysis
      print('Analyzing newssource ' + newssource)
      analyze_newssource(fpath, data)

# Start

In [31]:
newssources = ['Washington Post']

analyze_newssources(newssources)

mal.', 'MOSCOW — China has handed over the genome of the coronavirus to Russia in a joint effort between the two countries to create a vaccine, the Russian Consulate in Guangzhou said in a statement Wednesday.', 'Russia has not had any confirmed cases of the new virus, but its consumer safety regulator, Rospotrebnadzor, said it started work on a vaccine a week ago.', '“Yes, of course, the development of a vaccine is underway. Every time we have a mutation (of a virus), we start developing a vaccine immediately,” Anna Popova, the head of Rospotrebnadzor, told Russia’s state-run RIA Novosti news agency.', 'German Shipulin, the deputy director of the Health Ministry’s strategic planning center, told the Izvestia newspaper that a vaccine will require a significant allocation of funds and take at least six months to develop.', '“We do not know how this virus will behave among our population. … The susceptibility of the population to it depends on genetics. But if the virus does get to Russi