In [1]:
from requests.exceptions import RequestException
from contextlib import closing
import re
from lxml import html
import requests
from datetime import datetime
import Algorithmia
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r"./climate_sentiment_key.json"

In [2]:
client = Algorithmia.client('sim0ds3RRSQiux4Q9vb47cqyuHe1')
algo = client.algo('nlp/SentimentAnalysis/1.0.5')
def get_algorithmia_sentiment(text):
    algo_in = {
          "document": text
        }
    return algo.pipe(algo_in).result[0]['sentiment']

In [3]:
# Instantiates a client
client = language.LanguageServiceClient()

def get_gcp_sentiment(text):
    document = types.Document(
        content=text,
        type=enums.Document.Type.PLAIN_TEXT)
    # Detects the sentiment of the text
    sentiment = client.analyze_sentiment(document=document).document_sentiment
    return sentiment.score

In [4]:
def date_from_url(url):
    date_search = r"(\d+)\/(\d+)\/(\d+)"
    return datetime(*[int(x) for x in re.search(date_search, url).groups()])

In [121]:
def get_mean_sentiment(sentiments):
    return sum([x[0] for x in sentiments]) / len(sentiments)

In [141]:
def analyze_sentiment_by_sentences(url, verbose=False):
    # download HTML from simplified page
    base_url = "https://www.textise.net/showText.aspx?strURL="
    print("Downloading HTML page...")
    page = requests.get(base_url + url.replace(":", "%253A"))
    tree = html.fromstring(page.content)
    print("Finished downloading. Parsing...")
    
    divs = []
    for div in tree.xpath('//div/text()'):
        divs.append(div.rstrip())
        
    # special economist filter
    delete = ["Sections", "Here are some options:", "Get our daily newsletter", "a day ago", "Latest stories", "Upgrade your inbox and get our Daily Dispatch and Editor's Picks.", "Apps & Digital Editions", "Blogs", "From The Economist Group", "Media", "\r\nDid you know that you can easily add text-only links to your own web site? For more information, visit the",
             ]
    divs = filter(lambda x: x != "<div>", divs)
    divs = filter(lambda x: x != "</div>", divs)
    divs = filter(lambda x: x != "\xa0", divs)
    divs = filter(lambda x: x != ".", divs)
    divs = filter(lambda x: x[-9:] != "hours ago", divs)    
    divs = filter(lambda x: x != " |", divs)
    divs = filter(lambda x: x != "", divs)
    divs = filter(lambda x: x != "hours ago", divs)
    divs = filter(lambda x: x not in delete, divs)
    
    divs = list(divs)[5:-25]
    divs = " ".join(list(divs))
    sentences = [x.rstrip() + "." for x in divs.split(".")]
    sentiments = []
    for i, sentence in enumerate(sentences):
        sentiment = get_gcp_sentiment(sentence)
        sentiments.append((sentiment, sentence))
        if verbose:
            print("Finished analyzing sentence {} of {}".format(i + 1, len(sentences)))
            print("Sentiment: {}".format(sentiment))
        
    return sentiments

In [150]:
sentiments = analyze_sentiment_by_sentences("https://www.economist.com/business/2018/08/09/why-oil-firms-should-worry-more-about-climate-change", verbose=True)

Downloading HTML page...
Finished downloading. Parsing...
Finished analyzing sentence 1 of 22
Sentiment: -0.10000000149011612
Finished analyzing sentence 2 of 22
Sentiment: 0.0
Finished analyzing sentence 3 of 22
Sentiment: 0.0
Finished analyzing sentence 4 of 22
Sentiment: 0.0
Finished analyzing sentence 5 of 22
Sentiment: -0.4000000059604645
Finished analyzing sentence 6 of 22
Sentiment: 0.0
Finished analyzing sentence 7 of 22
Sentiment: -0.20000000298023224
Finished analyzing sentence 8 of 22
Sentiment: -0.699999988079071
Finished analyzing sentence 9 of 22
Sentiment: -0.4000000059604645
Finished analyzing sentence 10 of 22
Sentiment: -0.800000011920929
Finished analyzing sentence 11 of 22
Sentiment: 0.0
Finished analyzing sentence 12 of 22
Sentiment: 0.0
Finished analyzing sentence 13 of 22
Sentiment: 0.10000000149011612
Finished analyzing sentence 14 of 22
Sentiment: 0.10000000149011612
Finished analyzing sentence 15 of 22
Sentiment: -0.5
Finished analyzing sentence 16 of 22
Senti

In [151]:
sentiments

[(-0.10000000149011612,
  'THE oil industry has much to fear from the Paris climate deal of 2015, which aims to limit temperature rises to less than 2°C above the pre-industrial era.'),
 (0.0,
  ' To curb carbon emissions, demand for fossil fuels will have to drop in coming decades.'),
 (0.0,
  ' That is likely to push down oil prices and the value of investments that firms have made based upon them.'),
 (0.0,
  ' A report published on August 6th by Sarasin & Partners, an asset manager in London, suggests that oil firms are assuming that decarbonisation will be limited and are thus overstating their assets.'),
 (-0.4000000059604645,
  ' Sarasin notes that eight European oil giants all used long-term oil price assumptions of $70-80 a barrel, rising by 2% a year with inflation to $127-145 by 2050, to price their assets.'),
 (0.0, ' But that does not appear to assume any drop in demand.'),
 (-0.20000000298023224,
  ' The International Energy Agency predicts a price of just $60 by 2060; Oi

In [152]:
get_mean_sentiment(sentiments)

-0.14090909199281174