In [1]:
import pandas as pd
import matplotlib.pyplot as plt

### Global Variables

In [2]:
start, end = '2020-03-06', '2021-01-27'
start_lockdown1, start_lockdown2, start_lockdown3 = '2020-03-16', '2020-11-17', '2020-12-26'
end_lockdown1, end_lockdown2, end_lockdown3 = '2020-04-20', '2020-12-06', '2021-02-07'
input_postings = r'../data/preprocessing/combined_postings_final.pickle'
input_summer_gap = r'../data/preprocessing/year_20_final.pickle'
input_survey = r'../data/survey_data_covid_variables.csv'

### Load datasets

Load and analyize survey dataset:

In [3]:
survey = pd.read_csv(input_survey)
survey_sub = survey[['welle', 'co05_01', 'co06_01', 'co06_02', 'co06_03', 'co06_04', 'co06_05', 'co06_06', 'co08_01', 'co19_01']]
survey_sub.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,welle,co05_01,co06_01,co06_02,co06_03,co06_04,co06_05,co06_06,co08_01,co19_01
0,1,3,2,4,4,4,4,4,3,
1,1,3,1,1,-1,-1,-1,1,1,
2,1,1,1,2,-1,3,4,2,3,
3,1,2,2,2,1,2,1,2,1,
4,1,3,4,3,4,3,4,3,5,


In [4]:
survey_wave_map = {
    1: pd.to_datetime('5.5.2020'),
    2: pd.to_datetime('5.28.2020'),
    3: pd.to_datetime('6.17.2020'),
    4: pd.to_datetime('7.8.2020'),
    5: pd.to_datetime('7.30.2020'),
    6: pd.to_datetime('8.22.2020'),
    7: pd.to_datetime('9.14.2020'),
    8: pd.to_datetime('9.29.2020'),
    9: pd.to_datetime('10.21.2020'),
    10: pd.to_datetime('11.11.2020'),
    11: pd.to_datetime('11.28.2020'),
    12: pd.to_datetime('12.22.2020'),
}

In [5]:
survey_sub['wave_end'] = pd.to_datetime(survey_sub['welle'].map(survey_wave_map))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  survey_sub['wave_end'] = pd.to_datetime(survey_sub['welle'].map(survey_wave_map))


In [6]:
survey_sub = survey_sub.set_index('wave_end')

Load and analyze postings dataset:

In [7]:
cols_to_drop = ['rid', 'ppid', 'o', 'hl', 'tx', 'cn', '13', '16', '17', '19', '31', '200', 'pid_liwc']
postings = pd.read_pickle(input_postings).drop(cols_to_drop, axis=1)
postings.index = pd.to_datetime(postings.cd, utc=True)
postings['anger'] = postings['18']/postings['number_tokens']
postings.head()

Unnamed: 0_level_0,oid,pid,cd,vp,vn,allText,18,number_tokens,anger
cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-12-31 19:04:57.643000+00:00,2000122865130,1064710786,2020-12-31 20:04:57.643000+01:00,4,0,Herr Bundespräsidentnach Artikel 70 B-VG könne...,0,28,0.0
2021-01-01 05:00:14.350000+00:00,2000122865130,1064724063,2021-01-01 06:00:14.350000+01:00,2,0,Der Flash wird noch eine weitere Amtszeit dafü...,0,9,0.0
2020-12-31 19:57:02.017000+00:00,2000122865130,1064712838,2020-12-31 20:57:02.017000+01:00,2,0,Das ist wahrscheinlich sogar unserem HBP zu pe...,0,8,0.0
2020-12-31 18:09:04.760000+00:00,2000122865130,1064708391,2020-12-31 19:09:04.760000+01:00,3,0,Ausnahmegesetze für unseren Ausnahmenkanzler ...,0,4,0.0
2020-12-31 17:43:57.973000+00:00,2000122865130,1064707195,2020-12-31 18:43:57.973000+01:00,3,0,Anschober will...Anschober kann aber nicht.,0,5,0.0


In [8]:
#postings['anger'].hist()

Load and analyze dataset of articles to fill gap in the summer

In [None]:
postings_summer_gap = pd.read_pickle(input_summer_gap).sort_index()
postings_summer_gap.index = pd.to_datetime(postings_summer_gap.index, utc=True)
postings_summer_gap.tail()

In [None]:
postings_summer_gap['anger2'] = postings_summer_gap['18'] / postings_summer_gap['number_tokens']

In [None]:
p1 = pd.DataFrame(postings['anger'])
p1.index = postings['pid']

p2 = pd.DataFrame(postings_summer_gap['anger2'])
p2.index = postings_summer_gap['postid']

Create dataframe containing liveticker (anger) and articles (anger2) counts as well as the sum

In [None]:
combined = pd.concat([postings['anger'].resample('1d').sum(), postings_summer_gap['anger2'].resample('1d').sum()], axis=1)
combined['anger_total'] = combined['anger'].fillna(0) + combined['anger2'].fillna(0)
combined

Create baseline difference for selected surey questions

In [None]:
survey_grouped = survey_sub.groupby('welle').resample('1d').mean()
survey_grouped.index = survey_grouped.index.droplevel('welle')
survey_grouped['baseline_diff_05'] = (survey_grouped['co05_01'] - survey_grouped['co05_01'][0])/survey_grouped['co05_01'][0]
survey_grouped['baseline_diff_06_1'] = (survey_grouped['co06_01'] - survey_grouped['co06_01'][0])/survey_grouped['co06_01'][0]
survey_grouped['baseline_diff_06_2'] = (survey_grouped['co06_02'] - survey_grouped['co06_02'][0])/survey_grouped['co06_02'][0]
survey_grouped['baseline_diff_06_3'] = (survey_grouped['co06_03'] - survey_grouped['co06_03'][0])/survey_grouped['co06_03'][0]
survey_grouped['baseline_diff_06_4'] = (survey_grouped['co06_04'] - survey_grouped['co06_04'][0])/survey_grouped['co06_04'][0]
survey_grouped['baseline_diff_06_5'] = (survey_grouped['co06_05'] - survey_grouped['co06_05'][0])/survey_grouped['co06_05'][0]
survey_grouped['baseline_diff_06_6'] = (survey_grouped['co06_06'] - survey_grouped['co06_06'][0])/survey_grouped['co06_06'][0]
survey_grouped['baseline_diff_08'] = (survey_grouped['co08_01'] - survey_grouped['co08_01'][0])/survey_grouped['co08_01'][0]
survey_grouped['baseline_diff_19'] = (survey_grouped['co19_01'] - survey_grouped['co19_01'][9])/survey_grouped['co19_01'][9]
survey_grouped

In [None]:
survey_grouped['baseline_diff_05'].plot()

### Plots

In [None]:
fig, ax = plt.subplots(figsize=(20,10))
ax.plot(combined.loc[start:end, 'anger_total'], linestyle='-', linewidth=1, alpha=0.4, label='DERSTANDARD livetickers & articles: LIWC Anger-count')

ax.set_ylabel('DERSTANDARD: LIWC Anger-count')

ax.axvspan(pd.to_datetime(start_lockdown1), pd.to_datetime(end_lockdown1), color='red', alpha=0.05, label="Lockdowns")
ax.axvspan(pd.to_datetime(start_lockdown2), pd.to_datetime(end_lockdown2), color='red', alpha=0.05)
ax.axvspan(pd.to_datetime(start_lockdown3), pd.to_datetime(end_lockdown3), color='red', alpha=0.05)

ax.axvline(pd.to_datetime('2020-04-06'), linewidth=0.5, color='red', alpha=0.5, label='2020-04-06: Mandatory face-masks in grocery stores')
ax.axvline(pd.to_datetime('2020-11-02'), linewidth=0.5, color='red', alpha=0.5, label='2020-11-02: Terrorist attack in Vienna')

ax2 = ax.twinx()
ax2.set_ylim([-1,1])
lables = [item.get_text() for item in ax2.get_yticklabels()]
lables[1] = 'no'
lables[len(lables)-2] = 'yes'
lables[int(len(lables)/2)] = 'base\nline'
ax2.set_yticklabels(lables)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_05'],label="Q5: Should the actions be increased?", linestyle='-', marker='o', linewidth=0.5, color='blue')
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_06_1'],label="Q6_1: Do you feel restricted in your family life?", linestyle='-', marker='o', linewidth=0.5)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_06_2'],label="Q6_2: Do you feel restricted in your work life?", linestyle='-', marker='o', linewidth=0.5)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_06_3'],label="Q6_3: Do you feel restricted in your school/uni life?", linestyle='-', marker='o', linewidth=0.5)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_06_4'],label="Q6_4: Do you feel restricted in your social life?", linestyle='-', marker='o', linewidth=0.5)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_06_5'],label="Q6_5: Do you feel restricted in your cultural life?", linestyle='-', marker='o', linewidth=0.5)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_06_6'],label="Q6_6: Do you feel restricted in your shopping experience?", linestyle='-', marker='o', linewidth=0.5)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_08'],label="Q8: Do you feel angry in the last weeks?", linestyle='-', marker='o', linewidth=0.5)
ax2.plot(survey_grouped.index, survey_grouped['baseline_diff_19'],label="Q19: Has the government a good communication strategy?", linestyle='-', marker='o', linewidth=0.5)

ax2.tick_params(axis='y', labelcolor='gray')

fig.autofmt_xdate()
ax2.set_ylabel("Baseline difference", color='gray')
ax.set_title("Public sentiment during the first three lockdowns in Austria")

lines, labels = ax.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, loc=0)

In [None]:
combined = pd.concat([postings['pid'].resample('1d').count(), postings_summer_gap['postid'].resample('1d').count()], axis=1)
combined['pid'] = combined['pid'].fillna(0)
combined['postid'] = combined['postid'].fillna(0)
combined['postings_total'] = combined['pid'] + combined['postid']
combined.index = pd.to_datetime(combined.index, utc=True)

fig, ax = plt.subplots(figsize=(20,10))

plt.title('Total number of LWIC anger counts from DERSTANDARD livetickers and articles')
ax.plot(combined.loc[start:end,'postings_total'], label='DERSTANDARD: Combined number', linestyle='-', linewidth=1, alpha=0.4)
ax.plot(combined.loc[start:end,'pid'], label='DERSTANDARD: Only Livetickers', linestyle='-', linewidth=1, alpha=0.4)
ax.plot(combined.loc[start:end,'postid'], label='DERSTANDARD: Only Articles', linestyle='-', linewidth=1, alpha=0.4)
plt.legend(loc='upper right')

In [None]:
fig, ax = plt.subplots(figsize=(20,10))

ax.axvspan(pd.to_datetime(start_lockdown1), pd.to_datetime(end_lockdown1), color='red', alpha=0.05, label="Lockdowns")
ax.axvspan(pd.to_datetime(start_lockdown2), pd.to_datetime(end_lockdown2), color='red', alpha=0.05)
ax.axvspan(pd.to_datetime(start_lockdown3), pd.to_datetime(end_lockdown3), color='red', alpha=0.05)

ax.axvline(pd.to_datetime('2020-04-06'), linewidth=0.5, color='red', alpha=0.5, label='2020-04-06: Mandatory face-masks in grocery stores')
ax.axvline(pd.to_datetime('2020-11-02'), linewidth=0.5, color='red', alpha=0.5, label='2020-11-02: Terrorist attack in Vienna')

plt.title('Total number of LWIC anger counts from DERSTANDARD livetickers and articles')
ax.plot(combined.loc[start:end,'postings_total'], label='DERSTANDARD: Combined number', linestyle='-', linewidth=1, alpha=0.4)
plt.legend(loc='upper right')

In [None]:
postings[['18', 'number_tokens']].resample('1d').sum()
combined = pd.concat(
    [postings[['18', 'number_tokens']].resample('1d').sum().rename(columns={'18': 'anger_livetickers', 'number_tokens': 'number_tokens_liveticker'}),
     postings_summer_gap[['18', 'number_tokens']].resample('1d').sum().rename(columns={'18': 'anger_articles', 'number_tokens': 'number_tokens_articles'})
    ], axis=1).fillna(0)
combined['anger_both'] = combined['anger_livetickers'] + combined['anger_articles']
combined['number_tokens_both'] = combined['number_tokens_liveticker'] + combined['number_tokens_articles']
combined['anger_rel'] = combined['anger_both'] / combined['number_tokens_both']

combined.head()

In [None]:
fig, ax = plt.subplots(figsize=(20,10))
ax.plot(combined.loc[start:end,'anger_rel'], alpha=0.4, label='Anger per day relative to total number of tokens')
ax.axvspan(pd.to_datetime(start_lockdown1), pd.to_datetime(end_lockdown1), color='red', alpha=0.05, label="Lockdowns")
ax.axvspan(pd.to_datetime(start_lockdown2), pd.to_datetime(end_lockdown2), color='red', alpha=0.05)
ax.axvspan(pd.to_datetime(start_lockdown3), pd.to_datetime(end_lockdown3), color='red', alpha=0.05)
ax.axvline(pd.to_datetime('2020-04-06'), linewidth=0.5, color='red', alpha=0.5, label='2020-04-06: Mandatory face-masks in grocery stores')
ax.axvline(pd.to_datetime('2020-11-02'), linewidth=0.5, color='red', alpha=0.5, label='2020-11-02: Terrorist attack in Vienna')

fig.autofmt_xdate()
plt.title('Normalized number of LWIC anger counts from DERSTANDARD livetickers and articles')

lines, labels = ax.get_legend_handles_labels()
ax.legend(loc='upper right')