# The Media has neglected Puerto Rico during their Hurricane Crisis (Redesign)

In [2]:
import pandas as pd
import numpy as np

## Cleaning TV States data (to show activity spike before landfall)

Change dates in TV_States to days before/after landfall 

Texas Landfall - 8/26  
Florida Landfall - 9/10  
Maria Landfall - 9/20  

In [22]:
states = pd.read_csv("tv_states.csv")

In [23]:
states['Days before Landfall'] = np.nan 
               # Some dates will be left nan. Those dates will be unneeded in Tableau

We will look at the 5 days before landfall, and 2 days after

In [24]:
n = list(reversed(range(7)))
n.append(-1)
n.append(-2)
n

[6, 5, 4, 3, 2, 1, 0, -1, -2]

### Texas (Harvey) Landfall countdown:

Hurricane Harvey happened the soonest. So lets see the 9 days starting from 2 days after landfall

In [25]:
states.loc[states.Date <= '8/28/17', 'Days before Landfall'] = n

In [26]:
states[states.Date <= '8/28/17']

Unnamed: 0,Date,Florida,Texas,Puerto Rico,Days before Landfall
0,8/20/17,0.0923,0.0286,0.0,6.0
1,8/21/17,0.0441,0.0904,0.0099,5.0
2,8/22/17,0.0359,0.0334,0.0,4.0
3,8/23/17,0.0431,0.0839,0.0103,3.0
4,8/24/17,0.0492,0.3978,0.0184,2.0
5,8/25/17,0.0442,1.4639,0.0,1.0
6,8/26/17,0.033,2.3203,0.0391,0.0
7,8/27/17,0.0825,1.6419,0.0459,-1.0
8,8/28/17,0.0346,2.5069,0.0096,-2.0


### Florida (Irma) Landfall Countdown:

Need tens digit for date in September 1-9 (index 12-20 inclusive), otherwise conditional boolean mask will not see those dates.

In [27]:
dates = states.Date.values   # Pointer to dates column in dataframe

f = dates[20]
f[2:]

'9/17'

In [28]:
# RUN THIS CELL ONLY ONCE, or it will add unneeded 0's in date entries

for i in range(12, 21):
    s = dates[i].split('/')
    dates[i] = dates[i][:2] + '0' + dates[i][2:]

In [29]:
states.loc[(states.Date >= '9/04/17') & (states.Date <= '9/12/17'), 'Days before Landfall'] = n

In [30]:
states[(states.Date >= '9/04/17') & (states.Date <= '9/12/17')]

Unnamed: 0,Date,Florida,Texas,Puerto Rico,Days before Landfall
15,9/04/17,0.1194,0.4616,0.0363,6.0
16,9/05/17,0.5327,0.3904,0.1865,5.0
17,9/06/17,1.3598,0.3831,0.496,4.0
18,9/07/17,2.161,0.314,0.3286,3.0
19,9/08/17,3.2173,0.3867,0.1086,2.0
20,9/09/17,3.6556,0.202,0.0481,1.0
21,9/10/17,2.0631,0.0776,0.0084,0.0
22,9/11/17,3.0005,0.295,0.113,-1.0
23,9/12/17,2.228,0.1923,0.0936,-2.0


### Puerto Rico (Maria) Landfall countdown

In [31]:
states.loc[(states.Date >= '9/14/17') & (states.Date <= '9/22/17'), 'Days before Landfall'] = n

In [32]:
states[(states.Date >= '9/14/17') & (states.Date <= '9/22/17')]

Unnamed: 0,Date,Florida,Texas,Puerto Rico,Days before Landfall
25,9/14/17,1.1859,0.1452,0.0408,6.0
26,9/15/17,0.2983,0.0807,0.0331,5.0
27,9/16/17,0.2434,0.0906,0.0,4.0
28,9/17/17,0.2607,0.0534,0.0245,3.0
29,9/18/17,0.1238,0.0645,0.2057,2.0
30,9/19/17,0.0751,0.0352,0.7063,1.0
31,9/20/17,0.1086,0.0648,1.3539,0.0
32,9/21/17,0.1569,0.1449,0.9296,-1.0
33,9/22/17,0.0693,0.0663,0.4819,-2.0


In [33]:
states.to_csv("tv_rate_spikes.csv")

## Show cumulative sentences (or number of mentions) spoken after every day (Hurricane):

We will need "mediacloud_hurricanes" and "tv_hurricanes by network" datasets.  
Join them so we can compute number of sentences each network has spoken about each hurricane.

In [4]:
News_Percents = pd.read_csv("tv_hurricanes_by_network.csv")

In [5]:
hurricane_sentences = pd.read_csv("mediacloud_hurricanes.csv")

Need a Totals column for later computation

In [6]:
News_Percents['Total_%'] = News_Percents['BBC News'] + News_Percents['FOX News'] \
           + News_Percents['CNN'] + News_Percents['MSNBC']

### Hurricane Harvey Joining

In [7]:
HarveyTV = News_Percents[News_Percents['Query'] == 'Hurricane Harvey']

Collect list of dates for comparison:

In [8]:
Valid_Dates = HarveyTV.Date.values
Valid_Dates

array(['8/24/17', '8/25/17', '8/26/17', '8/27/17', '8/28/17', '8/29/17',
       '8/30/17', '8/31/17', '9/1/17', '9/2/17', '9/3/17', '9/4/17',
       '9/5/17', '9/6/17', '9/7/17', '9/8/17', '9/9/17', '9/10/17',
       '9/11/17', '9/12/17', '9/13/17', '9/14/17', '9/15/17', '9/16/17',
       '9/17/17', '9/18/17', '9/19/17', '9/20/17', '9/21/17', '9/22/17',
       '9/23/17', '9/24/17', '9/25/17'], dtype=object)

Remove dates that don't exist in both datasets (Sentences dataset contains more dates than TV set)

In [160]:
HarveySentence = hurricane_sentences[['Date', 'Harvey']]\
[hurricane_sentences.Date.isin(Valid_Dates)]  # Date must exist in list of dates

In [161]:
HarveySentence.head()

Unnamed: 0,Date,Harvey
4,8/24/17,309
5,8/25/17,1348
6,8/26/17,995
7,8/27/17,960
8,8/28/17,1956


In [162]:
HarveySentence = HarveySentence.rename(columns={'Harvey':'Num_Sentences'})

Join the two datasets:

In [163]:
HarveyTV.set_index(['Date'], inplace=True)   # Change both sets' indexes so they match
HarveySentence.set_index(['Date'], inplace=True)

In [164]:
join = [HarveyTV, HarveySentence]
HarveyJoin = pd.concat(join, axis=1, join_axes=[HarveyTV.index])

Currently 'Num_Sentences' = 'Total_%' of all sentences spoken in news.  
So 'Num_Sentences' / 'Total_%' = 'all sentences spoken in news'  
We need # of all sentences to compute # of acutal sentences spoken by each news network.

In [165]:
HarveyJoin.Num_Sentences = HarveyJoin.Num_Sentences / HarveyJoin['Total_%']

Turn News network percentages into Number of Sentences.  
Percentages currently out of 100, so divide all by 100

In [166]:
HarveyJoin['BBC News'] = HarveyJoin['BBC News']/100 * HarveyJoin.Num_Sentences
HarveyJoin['CNN'] = HarveyJoin['CNN']/100 * HarveyJoin.Num_Sentences
HarveyJoin['FOX News'] = HarveyJoin['FOX News']/100 * HarveyJoin.Num_Sentences
HarveyJoin['MSNBC'] = HarveyJoin['MSNBC']/100 * HarveyJoin.Num_Sentences

Turn those numbers into cumulative sums (Total sentences spoken increase each day)

In [167]:
HarveyJoin['BBC News'] = HarveyJoin['BBC News'].cumsum(axis=0)
HarveyJoin['CNN'] = HarveyJoin['CNN'].cumsum(axis=0)
HarveyJoin['FOX News'] = HarveyJoin['FOX News'].cumsum(axis=0)
HarveyJoin['MSNBC'] = HarveyJoin['MSNBC'].cumsum(axis=0)

In [168]:
HarveyJoin

Unnamed: 0_level_0,Query,BBC News,CNN,FOX News,MSNBC,Total_%,Num_Sentences
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
8/24/17,Hurricane Harvey,0.0,1.200204,1.273901,0.615894,0.2935,1052.810903
8/25/17,Hurricane Harvey,1.809039,5.846971,4.871942,4.042048,2.5566,527.262771
8/26/17,Hurricane Harvey,3.875965,8.187318,7.816906,6.639811,3.6682,271.250204
8/27/17,Hurricane Harvey,5.106474,10.835906,10.906171,9.271449,0.8496,1129.943503
8/28/17,Hurricane Harvey,7.59923,17.4864,14.571018,16.023352,0.8294,2358.331324
8/29/17,Hurricane Harvey,12.488408,21.830895,22.681608,21.509088,0.8802,2593.728698
8/30/17,Hurricane Harvey,14.476671,25.48096,29.031856,25.550513,0.8151,1966.629861
8/31/17,Hurricane Harvey,17.354033,29.38906,33.530113,29.296794,1.1053,1359.811816
9/1/17,Hurricane Harvey,18.346267,32.342268,37.643931,32.327534,1.4161,783.136784
9/2/17,Hurricane Harvey,19.204499,34.066458,40.381916,34.007126,1.9934,351.158824


In [169]:
HarveyJoin.to_csv("Harvey_cumula.csv")

### Hurricane Irma Joining

Same steps as above, but w/ different hurricane and variable names

In [15]:
IrmaTV = News_Percents[News_Percents['Query'] == 'Hurricane Irma']

In [16]:
Valid_Dates = IrmaTV.Date.values
Valid_Dates

array(['8/31/17', '9/1/17', '9/2/17', '9/3/17', '9/4/17', '9/5/17',
       '9/6/17', '9/7/17', '9/8/17', '9/9/17', '9/10/17', '9/11/17',
       '9/12/17', '9/13/17', '9/14/17', '9/15/17', '9/16/17', '9/17/17',
       '9/18/17', '9/19/17', '9/20/17', '9/21/17', '9/22/17', '9/23/17',
       '9/24/17', '9/25/17'], dtype=object)

In [17]:
IrmaSentence = hurricane_sentences[['Date', 'Irma']]\
[hurricane_sentences.Date.isin(Valid_Dates)]  # Date must exist in list of dates

IrmaSentence.head()

Unnamed: 0,Date,Irma
11,8/31/17,61
12,9/1/17,66
13,9/2/17,36
14,9/3/17,52
15,9/4/17,221


In [18]:
IrmaSentence = IrmaSentence.rename(columns={'Irma':'Num_Sentences'})

In [19]:
IrmaTV.set_index(['Date'], inplace=True)
IrmaSentence.set_index(['Date'], inplace=True)

In [20]:
join = [IrmaTV, IrmaSentence]
IrmaJoin = pd.concat(join, axis=1, join_axes=[IrmaTV.index])

In [21]:
IrmaJoin.Num_Sentences = IrmaJoin.Num_Sentences / IrmaJoin['Total_%']

In [22]:
IrmaJoin['BBC News'] = IrmaJoin['BBC News']/100 * IrmaJoin.Num_Sentences
IrmaJoin['CNN'] = IrmaJoin['CNN']/100 * IrmaJoin.Num_Sentences
IrmaJoin['FOX News'] = IrmaJoin['FOX News']/100 * IrmaJoin.Num_Sentences
IrmaJoin['MSNBC'] = IrmaJoin['MSNBC']/100 * IrmaJoin.Num_Sentences

In [23]:
IrmaJoin['BBC News'] = IrmaJoin['BBC News'].cumsum(axis=0)
IrmaJoin['CNN'] = IrmaJoin['CNN'].cumsum(axis=0)
IrmaJoin['FOX News'] = IrmaJoin['FOX News'].cumsum(axis=0)
IrmaJoin['MSNBC'] = IrmaJoin['MSNBC'].cumsum(axis=0)

In [24]:
IrmaJoin

Unnamed: 0_level_0,Query,BBC News,CNN,FOX News,MSNBC,Total_%,Num_Sentences
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
8/31/17,Hurricane Irma,0.0,0.113488,0.496512,0.0,0.0387,1576.22739
9/1/17,Hurricane Irma,0.0,0.450943,0.774631,0.044426,0.2169,304.28769
9/2/17,Hurricane Irma,0.0,0.487566,1.043945,0.098488,0.1445,249.134948
9/3/17,Hurricane Irma,0.0,0.524745,1.362191,0.263064,0.1049,495.7102
9/4/17,Hurricane Irma,0.362502,1.340723,2.113787,0.542988,0.3158,699.810006
9/5/17,Hurricane Irma,2.82727,3.696512,4.10301,1.663208,1.1206,707.656613
9/6/17,Hurricane Irma,9.827124,7.900069,7.489057,3.753751,2.7463,607.362633
9/7/17,Hurricane Irma,17.851622,13.538019,10.890625,6.629734,3.5852,556.175388
9/8/17,Hurricane Irma,24.475062,19.112651,15.681028,10.131259,4.0968,500.146456
9/9/17,Hurricane Irma,28.453726,22.607745,19.786107,14.502421,4.6936,339.824442


In [25]:
IrmaJoin.to_csv("Irma_cumula.csv")

### Hurricane Maria Joining

In [28]:
MariaTV = News_Percents[News_Percents['Query'] == 'Hurricane Maria']

In [29]:
Valid_Dates = MariaTV.Date.values
Valid_Dates

array(['9/17/17', '9/18/17', '9/19/17', '9/20/17', '9/21/17', '9/22/17',
       '9/23/17', '9/24/17', '9/25/17'], dtype=object)

In [30]:
MariaSentence = hurricane_sentences[['Date', 'Maria']]\
[hurricane_sentences.Date.isin(Valid_Dates)]  # Date must exist in list of dates

MariaSentence.head()

Unnamed: 0,Date,Maria
28,9/17/17,41
29,9/18/17,304
30,9/19/17,518
31,9/20/17,545
32,9/21/17,528


In [31]:
MariaSentence = MariaSentence.rename(columns={'Maria':'Num_Sentences'})

In [32]:
MariaTV.set_index(['Date'], inplace=True)
MariaSentence.set_index(['Date'], inplace=True)

In [33]:
join = [MariaTV, MariaSentence]
MariaJoin = pd.concat(join, axis=1, join_axes=[MariaTV.index])

In [34]:
MariaJoin.Num_Sentences = MariaJoin.Num_Sentences / MariaJoin['Total_%']

In [35]:
MariaJoin['BBC News'] = MariaJoin['BBC News']/100 * MariaJoin.Num_Sentences
MariaJoin['CNN'] = MariaJoin['CNN']/100 * MariaJoin.Num_Sentences
MariaJoin['FOX News'] = MariaJoin['FOX News']/100 * MariaJoin.Num_Sentences
MariaJoin['MSNBC'] = MariaJoin['MSNBC']/100 * MariaJoin.Num_Sentences

In [36]:
MariaJoin['BBC News'] = MariaJoin['BBC News'].cumsum(axis=0)
MariaJoin['CNN'] = MariaJoin['CNN'].cumsum(axis=0)
MariaJoin['FOX News'] = MariaJoin['FOX News'].cumsum(axis=0)
MariaJoin['MSNBC'] = MariaJoin['MSNBC'].cumsum(axis=0)

In [37]:
MariaJoin

Unnamed: 0_level_0,Query,BBC News,CNN,FOX News,MSNBC,Total_%,Num_Sentences
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
9/17/17,Hurricane Maria,0.0,0.206066,0.056135,0.147799,0.0577,710.571924
9/18/17,Hurricane Maria,1.043639,1.089655,0.944933,0.371774,0.642,473.520249
9/19/17,Hurricane Maria,3.455228,2.665953,1.426925,1.081894,1.9119,270.934672
9/20/17,Hurricane Maria,5.372343,4.215362,2.472219,2.020077,2.8087,194.039947
9/21/17,Hurricane Maria,6.867709,6.244607,3.471121,2.776564,1.6793,314.416721
9/22/17,Hurricane Maria,8.158574,7.543421,4.037306,3.710699,0.8748,467.535437
9/23/17,Hurricane Maria,8.16453,7.545573,4.038191,3.711706,0.6699,1.49276
9/24/17,Hurricane Maria,8.16453,7.545573,4.038191,3.711706,0.1905,0.0
9/25/17,Hurricane Maria,8.16453,7.545573,4.038191,3.711706,0.506,0.0


In [38]:
MariaJoin.to_csv("Maria_cumula.csv")