In [59]:
import os
from datetime import datetime
import requests
import time
import pandas as pd
import spacy
nlp = spacy.load('en_core_web_sm')
from collections import Counter

In [60]:
#read in pickle file as dataframe and drop unnecessary columns
spacy_analysis = pd.read_pickle("vader_picke_file.pkl").drop(columns=['web_url','snippet','date','_id','article','VADER snippet neg', 'VADER snippet neu', 'VADER snippet pos', 'VADER snippet compound'])

In [61]:
spacy_analysis

Unnamed: 0,abstract,pub_date
0,"It was a year full of big environmental news, ...",2010-12-31T23:00:27+0000
1,The first federal regulations on climate-alter...,2010-12-31T01:47:00+0000
2,About $11 billion in oil and natural gas trade...,2010-12-30T20:21:30+0000
3,"Recent years have been humbling for oil bulls,...",2010-12-30T02:34:46+0000
4,Crude oil traded over $90 for a fifth consecut...,2010-12-29T14:06:12+0000
...,...,...
1515,Weakened demand for semiconductors is hurting ...,2019-02-04T11:36:40+0000
1516,"Innovation, investment and inviting geology ha...",2019-02-03T23:17:02+0000
1517,On one side of a barricade in remote British C...,2019-01-27T23:45:27+0000
1518,The United States’ recognition of Venezuela’s ...,2019-01-25T00:29:52+0000


In [62]:
master_df = pd.DataFrame(spacy_analysis)
master_df

Unnamed: 0,abstract,pub_date
0,"It was a year full of big environmental news, ...",2010-12-31T23:00:27+0000
1,The first federal regulations on climate-alter...,2010-12-31T01:47:00+0000
2,About $11 billion in oil and natural gas trade...,2010-12-30T20:21:30+0000
3,"Recent years have been humbling for oil bulls,...",2010-12-30T02:34:46+0000
4,Crude oil traded over $90 for a fifth consecut...,2010-12-29T14:06:12+0000
...,...,...
1515,Weakened demand for semiconductors is hurting ...,2019-02-04T11:36:40+0000
1516,"Innovation, investment and inviting geology ha...",2019-02-03T23:17:02+0000
1517,On one side of a barricade in remote British C...,2019-01-27T23:45:27+0000
1518,The United States’ recognition of Venezuela’s ...,2019-01-25T00:29:52+0000


In [63]:
master_df['pub_date'] = pd.to_datetime(master_df['pub_date'])

In [64]:
datetime.now()

datetime.datetime(2020, 2, 11, 16, 56, 58, 269566)

In [65]:
master_df['pub_date'] = master_df['pub_date'].dt.strftime('%Y-%m-%d')
master_df

Unnamed: 0,abstract,pub_date
0,"It was a year full of big environmental news, ...",2010-12-31
1,The first federal regulations on climate-alter...,2010-12-31
2,About $11 billion in oil and natural gas trade...,2010-12-30
3,"Recent years have been humbling for oil bulls,...",2010-12-30
4,Crude oil traded over $90 for a fifth consecut...,2010-12-29
...,...,...
1515,Weakened demand for semiconductors is hurting ...,2019-02-04
1516,"Innovation, investment and inviting geology ha...",2019-02-03
1517,On one side of a barricade in remote British C...,2019-01-27
1518,The United States’ recognition of Venezuela’s ...,2019-01-25


In [66]:
all_news_per_day = []
#Iterating through unique dates
for day in master_df['pub_date'].unique():
    #Finding all records with the corresponding day
    corr_day = master_df.loc[master_df['pub_date']== day]
    # Combine the news for those days
    combined_news = " ".join(s for s in corr_day["abstract"])
    #appending combined news to all news per day with the current day
    all_news_per_day.append((combined_news,day))

In [67]:
combined_news_day = pd.DataFrame(all_news_per_day,columns=['combined_absract', 'Date'])

In [68]:
combined_news_day

Unnamed: 0,combined_absract,Date
0,"It was a year full of big environmental news, ...",2010-12-31
1,About $11 billion in oil and natural gas trade...,2010-12-30
2,Crude oil traded over $90 for a fifth consecut...,2010-12-29
3,PetroChina is selling its stake in a gas pipel...,2010-12-28
4,Over the last three presidential administratio...,2010-12-24
...,...,...
965,Weakened demand for semiconductors is hurting ...,2019-02-04
966,"Innovation, investment and inviting geology ha...",2019-02-03
967,On one side of a barricade in remote British C...,2019-01-27
968,The United States’ recognition of Venezuela’s ...,2019-01-25


In [69]:
#TOkenization and Lematization
tokens = []
lemma = []
for doc in nlp.pipe(combined_news_day['combined_absract'].astype('unicode').values, batch_size=50,
                        n_threads=3):
    if doc.is_parsed:
        tokens.append([n.text for n in doc])
        lemma.append([n.lemma_ for n in doc
                        if not n.is_stop
                        and not n.is_punct])
    else:
        # We want to make sure that the lists of parsed results have the
        # same number of entries of the original Dataframe, so add some blanks in case the parse fails
        tokens.append(None)
        lemma.append(None)
combined_news_day['abstract_tokens'] = tokens
combined_news_day['abstract_lemma'] = lemma

In [70]:
combined_news_day

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma
0,"It was a year full of big environmental news, ...",2010-12-31,"[It, was, a, year, full, of, big, environmenta...","[year, big, environmental, news, BP, oil, spil..."
1,About $11 billion in oil and natural gas trade...,2010-12-30,"[About, $, 11, billion, in, oil, and, natural,...","[$, 11, billion, oil, natural, gas, trade, imp..."
2,Crude oil traded over $90 for a fifth consecut...,2010-12-29,"[Crude, oil, traded, over, $, 90, for, a, fift...","[crude, oil, trade, $, 90, fifth, consecutive,..."
3,PetroChina is selling its stake in a gas pipel...,2010-12-28,"[PetroChina, is, selling, its, stake, in, a, g...","[PetroChina, sell, stake, gas, pipeline, opera..."
4,Over the last three presidential administratio...,2010-12-24,"[Over, the, last, three, presidential, adminis...","[presidential, administration, Office, Foreign..."
...,...,...,...,...
965,Weakened demand for semiconductors is hurting ...,2019-02-04,"[Weakened, demand, for, semiconductors, is, hu...","[weakened, demand, semiconductor, hurt, financ..."
966,"Innovation, investment and inviting geology ha...",2019-02-03,"[Innovation, ,, investment, and, inviting, geo...","[Innovation, investment, invite, geology, give..."
967,On one side of a barricade in remote British C...,2019-01-27,"[On, one, side, of, a, barricade, in, remote, ...","[barricade, remote, British, Columbia, member,..."
968,The United States’ recognition of Venezuela’s ...,2019-01-25,"[The, United, States, ’, recognition, of, Vene...","[United, States, recognition, Venezuela, oppos..."


In [71]:
counts = []
for x in combined_news_day['abstract_lemma']:
    counts.append(Counter(x).most_common(10))

In [73]:
combined_news_day['counts'] = counts

In [74]:
combined_news_day['top_word'] = combined_news_day['counts'].apply(lambda x: x[0][0])

In [76]:
combined_news_day['top_word']

0           climate
1               oil
2             crude
3            energy
4      presidential
           ...     
965        weakened
966             oil
967       barricade
968          United
969            2070
Name: top_word, Length: 970, dtype: object

In [77]:
combined_news_day.loc[combined_news_day['top_word'] == 'gas']

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma,counts,top_word
32,Environmental concerns and other issues could ...,2010-10-13,"[Environmental, concerns, and, other, issues, ...","[environmental, concern, issue, complicate, ex...","[(gas, 2), (environmental, 1), (concern, 1), (...",gas
54,A sewage treatment in England has produced a d...,2010-08-05,"[A, sewage, treatment, in, England, has, produ...","[sewage, treatment, England, produce, demo, ca...","[(gas, 2), (Reliance, 2), (sewage, 1), (treatm...",gas
145,Alaskan natural gas could provide an alternati...,2011-12-10,"[Alaskan, natural, gas, could, provide, an, al...","[alaskan, natural, gas, provide, alternative, ...","[(gas, 2), (oil, 2), (alaskan, 1), (natural, 1...",gas
148,"The actor, who lives in upstate New York, is a...",2011-12-02,"[The, actor, ,, who, lives, in, upstate, New, ...","[actor, live, upstate, New, York, odd, neighbo...","[(gas, 2), (actor, 1), (live, 1), (upstate, 1)...",gas
294,The Russian gas export monopoly said that it w...,2012-09-05,"[The, Russian, gas, export, monopoly, said, th...","[russian, gas, export, monopoly, say, strongly...","[(gas, 2), (russian, 1), (export, 1), (monopol...",gas
301,"Gas and coal power won't disappear soon, but w...",2012-08-16,"[Gas, and, coal, power, wo, n't, disappear, so...","[gas, coal, power, will, disappear, soon, carb...","[(gas, 1), (coal, 1), (power, 1), (will, 1), (...",gas
340,"Are gas prices soaring, or collapsing? It depe...",2012-03-30,"[Are, gas, prices, soaring, ,, or, collapsing,...","[gas, price, soar, collapse, depend, gas, talk...","[(gas, 3), (price, 3), (soar, 1), (collapse, 1...",gas
405,If ENI and its partners succeed in exporting l...,2013-10-03,"[If, ENI, and, its, partners, succeed, in, exp...","[ENI, partner, succeed, export, liquefy, natur...","[(gas, 2), (ENI, 1), (partner, 1), (succeed, 1...",gas
465,As gas starts to flow from wells drilled into ...,2013-03-12,"[As, gas, starts, to, flow, from, wells, drill...","[gas, start, flow, well, drill, Utica, Shale, ...","[(gas, 1), (start, 1), (flow, 1), (well, 1), (...",gas
579,A Russian gas embargo might be just what the w...,2014-04-12,"[A, Russian, gas, embargo, might, be, just, wh...","[russian, gas, embargo, world, need, seriously...","[(gas, 2), (russian, 1), (embargo, 1), (world,...",gas


In [78]:
combined_news_day.loc[combined_news_day['top_word'] == 'Energy']

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma,counts,top_word
10,An Energy Department forecast projects that th...,2010-12-16,"[An, Energy, Department, forecast, projects, t...","[Energy, Department, forecast, project, price,...","[(Energy, 1), (Department, 1), (forecast, 1), ...",Energy
137,PetroBakken Energy said that it had paid $262 ...,2010-01-05,"[PetroBakken, Energy, said, that, it, had, pai...","[PetroBakken, Energy, say, pay, $, 262, millio...","[(Energy, 2), (PetroBakken, 1), (say, 1), (pay...",Energy
208,Energy Secretary Chu names a panel to study th...,2011-05-06,"[Energy, Secretary, Chu, names, a, panel, to, ...","[Energy, Secretary, Chu, name, panel, study, c...","[(Energy, 1), (Secretary, 1), (Chu, 1), (name,...",Energy
251,The Energy Department has offered a large loan...,2011-02-03,"[The, Energy, Department, has, offered, a, lar...","[Energy, Department, offer, large, loan, guara...","[(Energy, 1), (Department, 1), (offer, 1), (la...",Energy
377,The French oil giant Total is acquiring a big ...,2012-01-03,"[The, French, oil, giant, Total, is, acquiring...","[french, oil, giant, Total, acquire, big, piec...","[(Energy, 2), (shale, 2), (operation, 2), ($, ...",Energy
588,The Energy Department detailed how much more i...,2014-03-12,"[The, Energy, Department, detailed, how, much,...","[Energy, Department, detail, expect, consumer,...","[(Energy, 1), (Department, 1), (detail, 1), (e...",Energy


In [79]:
combined_news_day.loc[combined_news_day['top_word'] == 'United']

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma,counts,top_word
266,The United States may soon wipe out its need f...,2012-12-13,"[The, United, States, may, soon, wipe, out, it...","[United, States, soon, wipe, need, import, oil...","[(United, 1), (States, 1), (soon, 1), (wipe, 1...",United
279,After an attempt to take over Unocal was block...,2012-11-12,"[After, an, attempt, to, take, over, Unocal, w...","[attempt, Unocal, block, United, States, say, ...","[(United, 3), (States, 3), (attempt, 2), (say,...",United
318,The United States is now a much more significa...,2012-06-09,"[The, United, States, is, now, a, much, more, ...","[United, States, significant, energy, producer...","[(United, 1), (States, 1), (significant, 1), (...",United
527,The United States government took aim at Exxon...,2014-09-13,"[The, United, States, government, took, aim, a...","[United, States, government, take, aim, Exxon,...","[(United, 1), (States, 1), (government, 1), (t...",United
531,United States refinery production in recent we...,2014-08-28,"[United, States, refinery, production, in, rec...","[United, States, refinery, production, recent,...","[(United, 2), (States, 2), (production, 2), (r...",United
574,"The United States, Canada and Mexico — for dif...",2014-04-21,"[The, United, States, ,, Canada, and, Mexico, ...","[United, States, Canada, Mexico, different, re...","[(United, 1), (States, 1), (Canada, 1), (Mexic...",United
590,The United States’s new gas abundance offers a...,2014-03-07,"[The, United, States, ’s, new, gas, abundance,...","[United, States, new, gas, abundance, offer, c...","[(United, 1), (States, 1), (new, 1), (gas, 1),...",United
669,A United States Geological Survey report ident...,2015-04-23,"[A, United, States, Geological, Survey, report...","[United, States, Geological, Survey, report, i...","[(United, 1), (States, 1), (Geological, 1), (S...",United
795,The United States was supposed to be a L.N.G. ...,2017-10-16,"[The, United, States, was, supposed, to, be, a...","[United, States, suppose, L.N.G., importer, pl...","[(United, 1), (States, 1), (suppose, 1), (L.N....",United
879,"The United States, Saudi Arabia and Russia see...",2018-06-13,"[The, United, States, ,, Saudi, Arabia, and, R...","[United, States, Saudi, Arabia, Russia, common...","[(United, 1), (States, 1), (Saudi, 1), (Arabia...",United


In [80]:
combined_news_day.loc[combined_news_day['top_word'] == 'China']

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma,counts,top_word
34,China’s deal with Chesapeake Energy involves a...,2010-10-11,"[China, ’s, deal, with, Chesapeake, Energy, in...","[China, deal, Chesapeake, Energy, involve, pot...","[(China, 1), (deal, 1), (Chesapeake, 1), (Ener...",China
487,China is turning the Xinjiang region into a na...,2014-12-20,"[China, is, turning, the, Xinjiang, region, in...","[China, turn, Xinjiang, region, national, hub,...","[(China, 1), (turn, 1), (Xinjiang, 1), (region...",China
521,Russian officials say that negotiations are he...,2014-09-25,"[Russian, officials, say, that, negotiations, ...","[russian, official, negotiation, heat, China, ...","[(China, 2), (russian, 1), (official, 1), (neg...",China
561,China has an insatiable appetite for energy an...,2014-05-22,"[China, has, an, insatiable, appetite, for, en...","[China, insatiable, appetite, energy, deal, na...","[(China, 1), (insatiable, 1), (appetite, 1), (...",China
624,"In China and other emerging markets, growth is...",2015-10-23,"[In, China, and, other, emerging, markets, ,, ...","[China, emerge, market, growth, wane, demand, ...","[(China, 1), (emerge, 1), (market, 1), (growth...",China
779,"For years, China gobbled up metals, crops and ...",2016-01-09,"[For, years, ,, China, gobbled, up, metals, ,,...","[year, China, gobble, metal, crop, fuel, rapid...","[(China, 2), (year, 1), (gobble, 1), (metal, 1...",China
789,China’s goal is to dominate Eurasia by relegat...,2017-11-03,"[China, ’s, goal, is, to, dominate, Eurasia, b...","[China, goal, dominate, Eurasia, relegate, Rus...","[(China, 1), (goal, 1), (dominate, 1), (Eurasi...",China


In [81]:
combined_news_day.loc[combined_news_day['top_word'] == 'Shell']

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma,counts,top_word
184,Shell was under pressure after it emerged that...,2011-08-16,"[Shell, was, under, pressure, after, it, emerg...","[Shell, pressure, emerge, company, wait, day, ...","[(Shell, 1), (pressure, 1), (emerge, 1), (comp...",Shell
324,"Despite lively opposition, Shell will start te...",2012-05-23,"[Despite, lively, opposition, ,, Shell, will, ...","[despite, lively, opposition, Shell, start, te...","[(Shell, 2), (despite, 1), (lively, 1), (oppos...",Shell
689,"Shell is dismantling its North Sea project, a ...",2015-02-18,"[Shell, is, dismantling, its, North, Sea, proj...","[Shell, dismantle, North, Sea, project, step, ...","[(Shell, 1), (dismantle, 1), (North, 1), (Sea,...",Shell


In [44]:
combined_news_day.loc[combined_news_day['top_word'] == 'Exxon']

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma,counts,top_word
179,Exxon Mobil struck an agreement to explore for...,2011-08-30,"[Exxon, Mobil, struck, an, agreement, to, expl...","[Exxon, Mobil, strike, agreement, explore, oil...","[(Exxon, 1), (Mobil, 1), (strike, 1), (agreeme...",Exxon
188,Exxon’s earnings were a bit lower than analyst...,2011-07-28,"[Exxon, ’s, earnings, were, a, bit, lower, tha...","[Exxon, earning, bit, low, analyst, expect, de...","[(Exxon, 1), (earning, 1), (bit, 1), (low, 1),...",Exxon
701,Exxon’s chief has extensive international busi...,2016-12-14,"[Exxon, ’s, chief, has, extensive, internation...","[Exxon, chief, extensive, international, busin...","[(Exxon, 1), (chief, 1), (extensive, 1), (inte...",Exxon


In [82]:
combined_news_day.loc[combined_news_day['top_word'] == 'President']

Unnamed: 0,combined_absract,Date,abstract_tokens,abstract_lemma,counts,top_word
112,President Obama set out to attract bipartisan ...,2010-04-01,"[President, Obama, set, out, to, attract, bipa...","[President, Obama, set, attract, bipartisan, s...","[(President, 1), (Obama, 1), (set, 1), (attrac...",President
288,President Obama’s claim about pipeline constru...,2012-10-17,"[President, Obama, ’s, claim, about, pipeline,...","[President, Obama, claim, pipeline, constructi...","[(President, 1), (Obama, 1), (claim, 1), (pipe...",President
319,President Hu Jintao of China and President Ham...,2012-06-08,"[President, Hu, Jintao, of, China, and, Presid...","[President, Hu, Jintao, China, President, Hami...","[(President, 2), (China, 2), (Afghanistan, 2),...",President
336,President Obama calls for the creation of an i...,2012-04-13,"[President, Obama, calls, for, the, creation, ...","[President, Obama, call, creation, interagency...","[(President, 1), (Obama, 1), (call, 1), (creat...",President
351,President Obama proposed new incentives for al...,2012-03-08,"[President, Obama, proposed, new, incentives, ...","[President, Obama, propose, new, incentive, al...","[(President, 1), (Obama, 1), (propose, 1), (ne...",President
415,President Enrique Peña Nieto proposed opening ...,2013-08-12,"[President, Enrique, Peña, Nieto, proposed, op...","[President, Enrique, Peña, Nieto, propose, ope...","[(President, 1), (Enrique, 1), (Peña, 1), (Nie...",President
454,President Obama will be doing Canada a favor i...,2013-03-31,"[President, Obama, will, be, doing, Canada, a,...","[President, Obama, Canada, favor, block, Keyst...","[(President, 1), (Obama, 1), (Canada, 1), (fav...",President
567,President Obama has a legacy opportunity in Uk...,2014-05-07,"[President, Obama, has, a, legacy, opportunity...","[President, Obama, legacy, opportunity, Ukrain...","[(President, 1), (Obama, 1), (legacy, 1), (opp...",President
718,"At an energy conference in Istanbul, President...",2016-10-10,"[At, an, energy, conference, in, Istanbul, ,, ...","[energy, conference, Istanbul, President, Vlad...","[(President, 2), (energy, 1), (conference, 1),...",President
757,President Obama includes an overdue domestic p...,2016-03-10,"[President, Obama, includes, an, overdue, dome...","[President, Obama, include, overdue, domestic,...","[(President, 1), (Obama, 1), (include, 1), (ov...",President
