In [1]:
import scrape_reuters as scrape
import pickle

Initializing Algorithmia...
Initializing GCP...


In [2]:
OUT_FILE = "output_data/output_reuters.pkl"

In [8]:
results = {}

links = set()
with open('sources/reuters_sources.txt', 'r') as sources:
    for line in sources:
        url = line.rstrip()
        links.add(url)

In [9]:
links

{'http://www.reuters.com/article/idUSL5N0LG30R20140211',
 'https://in.reuters.com/article/climate-change-environment-doha-idINDEE8AS08620121129',
 'https://uk.reuters.com/article/climate-emissions/emerging-economies-nearing-half-of-global-warming-emissions-idUKL5N0IL47J20131031',
 'https://uk.reuters.com/article/uk-britain-climate-puffins-idUKBRE97M0O220130823',
 'https://www.reuters.com/article/2011/04/07/us-climate-cities-idUSTRE7367DH20110407',
 'https://www.reuters.com/article/africa-climatechange-agriculture-idUSL5N16F1PV',
 'https://www.reuters.com/article/climate-monsoon/indian-monsoons-may-fail-more-often-due-to-climate-change-study-idUSL5E8M5COP20121106',
 'https://www.reuters.com/article/climatechange-impacts-idUSL6N0TD0DA20141123',
 'https://www.reuters.com/article/climatechange-summit-fashion/top-fashion-ceos-fear-rising-costs-from-climate-change-idUSL8N13S2EN20151203',
 'https://www.reuters.com/article/india-film-climatechange/love-in-the-time-of-climate-change-indian-film

In [11]:
try:
    with open(OUT_FILE, 'rb') as handle:
        avg_sentiments = pickle.load(handle)
        print("Loaded from file")
except FileNotFoundError:
    avg_sentiments = {}
    print("Existing output not found. Creating new output file...")

Loaded from file


In [12]:
for i, url in enumerate(links):
    print(url)

    try:
        date = scrape.get_date(url)
    except AttributeError:
        print("Missing date: {}".format(url))
        continue
    if date in avg_sentiments:
        print("Already have article from {}. Skipping {}".format(str(date)[:10], i))
        continue
    sentiments, date = scrape.analyze_sentiment_by_sentences(url, verbose=False)
    score = scrape.get_mean_sentiment(sentiments)
    print("Finished source {} out of {}. Average score: {}".format(i + 1, len(links), score))
    avg_sentiments[date] = score

https://www.reuters.com/article/us-malaria-climate/climate-change-could-mean-more-malaria-in-africa-south-america-idUSBREA251V620140306
Already have article from 2014-03-06. Skipping 0
https://www.reuters.com/article/us-usa-climatechange-lawsuit-rally/children-activists-rally-in-support-of-climate-change-lawsuit-idUSKCN1N401P
Already have article from 2018-10-30. Skipping 1
https://www.reuters.com/article/us-climate-ipcc/global-warming-threat-heightened-in-latest-u-n-report-idUSBREA2U00E20140331
Already have article from 2014-03-31. Skipping 2
https://www.reuters.com/article/us-science-lightning/bolt-from-the-blue-warming-climate-may-fuel-more-lightning-idUSKCN0IX2B020141113
Already have article from 2014-11-13. Skipping 3
https://www.reuters.com/article/markets-ratings-climatechange-idUSL8N1D841H
Already have article from 2016-11-07. Skipping 4
https://www.reuters.com/article/us-climate-slowdown/climate-scientists-struggle-to-explain-warming-slowdown-idUSBRE93F0AJ20130416
Already have

Finished downloading. Parsing...
Already have article from 2014-08-15. Skipping 118
https://www.reuters.com/article/us-kiribati-climate/tide-of-humanity-as-well-as-rising-seas-lap-at-kiribatis-future-idUSBRE95C04L20130613
Downloading HTML page...
Finished downloading. Parsing...
Already have article from 2013-06-13. Skipping 119
https://www.reuters.com/article/climate-monsoon/indian-monsoons-may-fail-more-often-due-to-climate-change-study-idUSL5E8M5COP20121106
Downloading HTML page...
Finished downloading. Parsing...
Already have article from 2012-11-06. Skipping 120
https://www.reuters.com/article/us-storm-irma-pope-climatechange/pope-says-humanity-will-go-down-if-it-does-not-address-climate-change-idUSKCN1BM1M4
Downloading HTML page...
Finished downloading. Parsing...
Already have article from 2017-09-11. Skipping 121
https://www.reuters.com/article/us-global-climatechange-crypto/producing-bitcoin-currency-could-void-climate-change-efforts-scientists-idUSKCN1N32RK
Downloading HTML pa

In [13]:
with open(OUT_FILE, 'wb') as f:
    pickle.dump(avg_sentiments, f)

In [14]:
x = []
y = []
for item in avg_sentiments:
    x.append(str(item)[:10])
    y.append(avg_sentiments[item])
details = zip(x, y)
for x, y in sorted(details, key=lambda x: x[0]):
    print("{},{}".format(x, y))

2011-01-12,0.04000000059604645
2011-02-14,0.05000000142238357
2011-03-02,0.015000000223517418
2011-04-07,0.03125000139698386
2011-04-26,-0.2000000016654239
2011-04-29,-0.06296296279739451
2011-05-27,-0.021428572280066355
2011-06-02,-0.08571428592715945
2011-06-06,0.04705882247756509
2011-06-09,-0.038461541900267966
2011-06-18,-0.03125000139698386
2011-06-20,-0.14736842324859217
2011-09-05,0.11818181881398866
2011-09-12,-0.10000000074505806
2011-09-28,3.0678861281451056e-09
2011-11-21,0.01999999980131785
2011-12-02,-0.012500000186264515
2011-12-04,-0.125
2012-01-27,-0.09310344966321156
2012-03-26,-0.1285714287133444
2012-04-13,0.1888888919795001
2012-04-26,0.1083333349476258
2012-04-29,0.1105263170443083
2012-06-27,-0.06923077083550967
2012-07-03,-0.14000000009934108
2012-07-22,0.11052631900498741
2012-09-26,-0.12500000260770322
2012-10-26,0.05833333171904087
2012-11-06,-0.0631578960701039
2012-11-09,-0.0733333354194959
2012-11-22,-0.03846154018090321
2012-11-29,0.08421052482567336
2012