In [None]:
from datetime import datetime

import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind, pearsonr
import statsmodels.api as sm

import numpy as np
%matplotlib inline

In [None]:
df = pd.read_parquet("DataEnhanced_LV.parquet")
print(df.shape)
df.head()

In [None]:
#df[df["year"]<2005].cabinet_ents.value_counts()

In [None]:
# DRopping Vaira vike, since coverage is bad
#df = df[df["president"]!= 'Vaira Vīķe-Freiberga']
print(df.shape)
df.head()

In [None]:
df.columns

In [None]:
cabinet_map = {'Izglītības un zinātnes' : "Research-Education",
               'Aizsardzības': "Defense", 
               'Premjers': "Prime-Minister", 
               'Reģionālās attīstības un pašvaldību lietu' : "Other",
               'Ekonomikas' : "Economy", 
               'Labklājības' : "Social-Affairs", 
               'Ārlietu': "Foreign-Affairs", 
               'Iekšlietu': "Internal-Affairs", 
               'Veselības': "Healthcare", 
               'Kultūras': "Culture", 
               'Bērnu un ģimenes lietu' : "Other", 
               'Tieslietu' : "Justice", 
               'Vides': "Environment",
               'Satiksmes' : "Other", 
               'Zemkopības': "Agriculture", 
               'Finanšu': "Finance",
               'Vides aizsardzības un reģionālās attīstības' : "Environment",
               'Bērnu, ģimenes un sabiedrības integrācijas lietu': "Other",
               'president_Izglītības un zinātnes' : "President-Research-Education", 
               'president_Aizsardzības': "President-Defense",
               'president_Premjers': "President-Prime-Minister",
               'president_Reģionālās attīstības un pašvaldību lietu': "President-Other",
               'president_Ekonomikas': "President-Economy", 
               'president_Labklājības': "President-Social-Affairs", 
               'president_Ārlietu': "President-Foreign-Affairs",
               'president_Iekšlietu' : "President-Internal-Affairs", 
               'president_Veselības': "President-Healthcare",
               'president_Kultūras': "President-Culture", 
               'president_Bērnu un ģimenes lietu' : "President-Other",
               'president_Tieslietu': "President-Justice", 
               'president_Vides': "President-Environment", 
               'president_Satiksmes': "President-Other",
               'president_Zemkopības': "President-Agriculture", 
               'president_Finanšu': "President-Finance",
               'president_Vides aizsardzības un reģionālās attīstības' : "President-Other",
               'president_Bērnu, ģimenes un sabiedrības integrācijas lietu': "President-Other"}

In [None]:
new_cols = list(set(cabinet_map.values()))
old_cols = list(set(cabinet_map.keys()))

new_col_data = {item:[] for item in new_cols}

for index, row in df.iterrows():
    plh = {item : 0 for item in new_cols}
    for entry in old_cols:
        if entry in row:
            plh[cabinet_map[entry]]+=row[entry]
    plh2 = {key: value != 0 for key, value in plh.items()}
    
    for key, value in plh2.items():
        new_col_data[key].append(value)    
        
for key, value in new_col_data.items():
    df[key]=value

df.head()

In [None]:
sorted(new_cols)

# Question 1 : How is presidential attention distributed accross policy spheres?  

In [None]:
columns = [  'President-Agriculture',
 'President-Culture',
 'President-Defense',
 'President-Economy',
 'President-Environment',
 'President-Finance',
 'President-Foreign-Affairs',
 'President-Healthcare',
 'President-Internal-Affairs',
 'President-Justice',
 'President-Other',
 'President-Prime-Minister',
 'President-Research-Education',
 'President-Social-Affairs',]


data = {item.replace("President-", "") : df[item].mean() for item in columns}
data_s = sorted(data.items(), key = lambda x:x[1])

label_vals = [i[0] for i in data_s]
values = [i[1] for i in data_s]

plt.rcParams["figure.figsize"] = (12,12)
y_pos = np.arange(len(values))

plt.barh(y_pos, values, color="grey")
plt.yticks(y_pos, label_vals, fontsize=14)
plt.xticks(fontsize=14)
plt.show()

In [None]:
print(df["President-Defense"].mean())
print(df["President-Foreign-Affairs"].mean())

# Q2: How the attention pattern differs over time?

In [None]:
columns = ['President-Prime-Minister',
            'President-Foreign-Affairs',
            'President-Defense', 
            'President-Internal-Affairs',
            'President-Healthcare',]


agg_dict = {i : "mean" for i in columns}

df2 = df.groupby(["year"]).agg(agg_dict)
#print(df2.shape)
#df2.head()

years = list(df2.index)
data = [list(df2[i]) for i in columns]



fig, ax = plt.subplots(figsize=(18, 10))

style_dict = {0 : "k-", 
             1: "k--", 
             2: "k-.",
             3: "k:",
             4: "k-x"}

for i in range(len(columns)):
    ax.plot(years, data[i], style_dict[i], label=columns[i])
plt.legend()

## Is Attention Higher For FP/DP than for others?

In [None]:
target =  'President-Foreign-Affairs'
nincl = ['President-Foreign-Affairs', 'President-Prime-Minister', 'President-Defense']
target_dist = list(df[target])

other_dist = []
for c in columns:
    if c != target and c not in nincl:
        other_dist += list(df[c])
        
print(len(target_dist))
print(len(other_dist))

print( sum(target_dist)/len(target_dist))
print( sum(other_dist)/len(other_dist))

ttest_ind(target_dist, other_dist)

In [None]:
target =  'President-Defense'
nincl = ['President-Foreign-Affairs', 'President-Prime-Minister', 'President-Defense']
target_dist = list(df[target])

other_dist = []
for c in columns:
    if c != target and c not in nincl:
        other_dist += list(df[c])
        
print(len(target_dist))
print(len(other_dist))

print( sum(target_dist)/len(target_dist))
print( sum(other_dist)/len(other_dist))

ttest_ind(target_dist, other_dist)

# Does presidential attention mirror media attention?

In [None]:
columns = [  'President-Agriculture',
 'President-Culture',
 'President-Defense',
 'President-Economy',
 'President-Environment',
 'President-Finance',
 'President-Foreign-Affairs',
 'President-Healthcare',
 'President-Internal-Affairs',
 'President-Justice',
 'President-Other',
 'President-Prime-Minister',
 'President-Research-Education',
 'President-Social-Affairs',]

In [None]:
columns2 = [i.replace("President-", "") for i in columns]
ministries_full = columns + columns2

agg_dict = {i:"mean" for i in ministries_full}
agg_dict["president"] = "last" 

dfg = df.groupby(["year", "month"]).agg(agg_dict)
dfg["grybauskaite"] = dfg.apply(lambda x:int(x["president"] in ["Valdis Zatlers", "Andris Bērziņš"]), axis=1)
print(dfg.shape)
dfg.head()

In [None]:
df.president.value_counts()

In [None]:
for m in columns2:
    print(m)
    print(pearsonr(dfg[m], dfg["President-"+m]))
    print()

In [None]:
for m in columns2:
    print(m)
    y = dfg[["President-"+m]]
    X = dfg[[m, "grybauskaite"]]
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    
    print()

# Is the attention higher on the first year

In [None]:
vvf1b = datetime(1999, 7, 8)
vvf1e = datetime(2000, 7, 8)
vvf2b = datetime(2003, 7, 8)
vvf2e = datetime(2004, 7, 8)
vzb = datetime(2007, 7, 8)
vze = datetime(2008, 7, 8)
abb = datetime(2011, 7, 8)
abe = datetime(2012, 7, 8)
rvb = datetime(2015, 7, 8)
rve = datetime(2016, 7, 8)
elb = datetime(2019, 7, 8)
ele = datetime(2020, 7, 8)

first_year = []

for i in df.date:
    plh = 0
    if vvf1b <= i < vvf1e :
        plh = 1
    if vvf2b <= i < vvf2e :
        plh = 1
    if vzb <= i < vze :
        plh = 1
    if abb <= i < abe :
        plh = 1
    if rvb <= i < rve :
        plh = 1
    if elb <= i < ele :
        plh = 1
    first_year.append(plh)
    
df["first_year"] = first_year
print(df.first_year.mean())

In [None]:
def get_pres_att(ents):
    if "Prezidents" in ents and len(ents)>1:
        return 1
    elif "Prezidente" in ents and len(ents)>1:
        return 1
    else:
        return 0

df["PresAtt"] = df.apply(lambda x : get_pres_att(x["cabinet_ents"]), axis =1)

In [None]:
dfa = df[df.first_year == 0]
dfb = df[df.first_year == 1]

In [None]:
for m in columns:
    print(m)
    print("Normal: ", dfa[m].mean())
    print("First year: ", dfb[m].mean())
    print(ttest_ind(dfa[m], dfb[m]))
    print()

In [None]:
m="PresAtt"
print(m)
print("Normal: ", dfa[m].mean())
print("First year: ", dfb[m].mean())
print(ttest_ind(dfa[m], dfb[m]))
print()

# Latvia PM Strength

In [None]:
df["cab_id"] = df.apply(lambda x : x["cabinet"]+"_"+str(x["cabinet_no"]), axis = 1)
print(df.shape)
df.head()

In [None]:
cab_strength_dict = {'Laimdota Straujuma_37': 2.4, 
                     'Laimdota Straujuma_38': 1.95, 
                     'Valdis Dombrovskis_36': 2.7, 
                     'Valdis Dombrovskis_35': 3, 
                     'Valdis Dombrovskis_34': 3.3,
                     'Einars Repše_29': 2.25,  
                     'Andris Bērziņš_28': 2.3, 
                     'Aigars Kalvītis_31': 2.15, 
                     'Aigars Kalvītis_32': 1.35, 
                     'Indulis Emsis_30': 1.35, 
                     'Ivars Godmanis_33': 1.2,
                     'Andris Šķēle_27': 2}

In [None]:
cols = ['PresAtt','President-Prime-Minister', 'President-Culture', 'President-Defense',
       'President-Research-Education', 'President-Finance', 'President-Economy',
       'President-Justice', 'President-Healthcare',
       'President-Foreign-Affairs', 'President-Internal-Affairs',
       'President-Agriculture', 'President-Social-Affairs', 'President-Environment', 'President-Other']

agg_dict = {i : "mean" for i in cols }

dfg2 = df.groupby(["cab_id"]).agg(agg_dict)
print(dfg2.shape)
dfg2.head()

In [None]:
dfg2["cab_ids"] = list(dfg2.index) 
dfg2["pm_strength"] = dfg2.apply(lambda x : cab_strength_dict.get(x.cab_ids, None), axis = 1)
print(dfg2.shape)
dfg2.head()

In [None]:
dfg2 = dfg2.dropna()
print(dfg2.shape)
dfg2.head(20)

In [None]:
pearsonr(dfg2["PresAtt"], dfg2["pm_strength"])

In [None]:
dfg2[["PresAtt", "pm_strength"]]

In [None]:
plh = df.groupby(["president"]).agg({"PresAtt": "mean"})
plh

In [None]:
plh = df[df["year"]<2005]
plh["PresAtt"].sum()