In [None]:
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind, pearsonr
import statsmodels.api as sm


%matplotlib inline

In [None]:
df = pd.read_parquet("DataEnhanced.parquet")
print(df.shape)
df.head()

In [None]:
df.columns

## Standardising Ministries

In [None]:
cabinet_map = {'Krašto apsaugos' : "Defense", 
               'Premjeras': "Prime-Minister",
               'Susisiekimo': "Other", 
               'Žemės Ūkio': "Agriculture", 
               'Kultūros': "Culture", 
               'Ūkio': "Economy", 
               'Aplinkos ': "Environment",
               'Energetikos': "Other", 
               'Ekonomikos ir inovacijų': "Economy", 
               'Teisingumo': "Justice", 
               'Finansų': "Finance",
               'Užsienio reikalų': "Foreign-Affairs", 
               'Prezidentas': "President", 
               'Socialinės apsaugos ir darbo': "Social-Security",
               'Sveikatos apsaugos': "Healthcare", 
               'Švietimo ir mokslo': "Research-Education",
               'Švietimo, mokslo ir sporto': "Research-Education", 
               'Vidaus reikalų': "Internal-Affairs",
               'president_Krašto apsaugos': "President-Defense", 
               'president_Premjeras': "President-Prime-Minister",
               'president_Susisiekimo': "President-Other", 
               'president_Žemės Ūkio': "President-Agriculture", 
               'president_Kultūros': "President-Culture",
               'president_Ūkio': "President-Economy", 
               'president_Aplinkos ': "President-Environment", 
               'president_Energetikos': "President-Other",
               'president_Ekonomikos ir inovacijų': "President-Economy", 
               'president_Teisingumo': "President-Justice",
               'president_Finansų': "President-Finance", 
               'president_Užsienio reikalų': "President-Foreign-Affairs",
               'president_Socialinės apsaugos ir darbo': "President-Social-Security",
               'president_Sveikatos apsaugos': "President-Healthcare", 
               'president_Švietimo ir mokslo': "President-Research-Education",
               'president_Švietimo, mokslo ir sporto': "President-Research-Education", 
               'president_Vidaus reikalų': "President-Internal-Affairs"}

In [None]:
new_cols = list(set(cabinet_map.values()))
old_cols = list(set(cabinet_map.keys()))

new_col_data = {item:[] for item in new_cols}

for index, row in df.iterrows():
    plh = {item : 0 for item in new_cols}
    for entry in old_cols:
        if entry in row:
            plh[cabinet_map[entry]]+=row[entry]
    plh2 = {key: value != 0 for key, value in plh.items()}
    
    for key, value in plh2.items():
        new_col_data[key].append(value)    

In [None]:
for key, value in new_col_data.items():
    df[key]=value

In [None]:
df.head()

In [None]:
sorted(new_cols)

# Question 1 : How is presidential attention distributed accross policy spheres?  

In [None]:

columns = [ 'President-Agriculture',
 'President-Culture',
 'President-Defense',
 'President-Economy',
 'President-Environment',
 'President-Finance',
 'President-Foreign-Affairs',
 'President-Healthcare',
 'President-Internal-Affairs',
 'President-Justice',
 'President-Other',
 'President-Prime-Minister',
 'President-Research-Education',
 'President-Social-Security',]


data = {item.replace("President-", "") : df[item].mean() for item in columns}
data_s = sorted(data.items(), key = lambda x:x[1])

label_vals = [i[0] for i in data_s]
values = [i[1] for i in data_s]

plt.rcParams["figure.figsize"] = (12,12)
y_pos = np.arange(len(values))

plt.barh(y_pos, values, color="grey")
plt.yticks(y_pos, label_vals, fontsize=14)
plt.xticks(fontsize=14)
plt.show()

plt.show((16,16))


# Q2: How the attention pattern differs over time?

In [None]:
columns = ['President-Prime-Minister',
            'President-Foreign-Affairs',
            'President-Defense', 
            'President-Internal-Affairs',
            'President-Healthcare',]


agg_dict = {i : "mean" for i in columns}

df2 = df.groupby(["year"]).agg(agg_dict)
#print(df2.shape)
#df2.head()

years = list(df2.index)
data = [list(df2[i]) for i in columns]


fig, ax = plt.subplots(figsize=(18, 10))

style_dict = {0 : "k-", 
             1: "k--", 
             2: "k-.",
             3: "k:",
             4: "k-x"}

for i in range(len(columns)):
    ax.plot(years, data[i], style_dict[i], label=columns[i])

plt.legend()

## Is Attention Higher For FP/DP than for others?

In [None]:
target =  'President-Foreign-Affairs'
nincl = ['President-Foreign-Affairs', 'President-Prime-Minister', 'President-Defense']
target_dist = list(df[target])

other_dist = []
for c in columns:
    if c != target and c not in nincl:
        other_dist += list(df[c])
        
print(len(target_dist))
print(len(other_dist))

print( sum(target_dist)/len(target_dist))
print( sum(other_dist)/len(other_dist))

ttest_ind(target_dist, other_dist)

In [None]:
target =  'President-Defense'

nincl = ['President-Foreign-Affairs', 'President-Prime-Minister', 'President-Defense']
target_dist = list(df[target])

other_dist = []
for c in columns:
    if c != target and c not in nincl:
        other_dist += list(df[c])
        
print(len(target_dist))
print(len(other_dist))

print( sum(target_dist)/len(target_dist))
print( sum(other_dist)/len(other_dist))

ttest_ind(target_dist, other_dist)

## Does Presidential Attention Mirror media attention?

In [None]:
columns = [  'President-Agriculture',
 'President-Culture',
 'President-Defense',
 'President-Economy',
 'President-Environment',
 'President-Finance',
 'President-Foreign-Affairs',
 'President-Healthcare',
 'President-Internal-Affairs',
 'President-Justice',
 'President-Other',
 'President-Prime-Minister',
 'President-Research-Education',
 'President-Social-Security',]

In [None]:
columns2 = [i.replace("President-", "") for i in columns]
ministries_full = columns + columns2

agg_dict = {i:"mean" for i in ministries_full}
agg_dict["president"] = "last" 

dfg = df.groupby(["year", "month"]).agg(agg_dict)
dfg["grybauskaite"] = dfg.apply(lambda x:int(x["president"]=="Dalia Grybauskaite"), axis=1)
print(dfg.shape)
dfg.head()

In [None]:
for m in columns2:
    print(m)
    print(pearsonr(dfg[m], dfg["President-"+m]))
    print()

In [None]:
for m in columns2:
    print(m)
    y = dfg[["President-"+m]]
    X = dfg[[m, "grybauskaite"]]
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    
    print()

In [None]:
def get_pres_att(ents):
    if "Prezidentas" in ents and len(ents)>1:
        return 1
    else:
        return 0

df["PresAtt"] = df.apply(lambda x : get_pres_att(x["cabinet_ents"]), axis =1)

In [None]:


plh_fp = []
plh_dp = []
for index, row in df.iterrows():
    v_fp = 0
    v_dp = 0
    
    if row["PresAtt"] == 1 and row["President-Defense"] == 0:
        v_dp = 1
    if row["PresAtt"] == 1 and row["President-Foreign-Affairs"] == 0:
        v_fp = 1
        
    plh_fp.append(v_fp)
    plh_dp.append(v_dp)
    
df["non-fp-att"] = plh_fp
df["non-dp-att"] = plh_dp

In [None]:
# Defense

print(df["President-Foreign-Affairs"].mean())
print(df["non-fp-att"].mean())

ttest_ind(list(df["President-Foreign-Affairs"]), list(df["non-fp-att"]))

# Q 2.1: Did Grybauskaite pay more attention to FP/DP than others?

In [None]:
df["grybauskaite"] = df.apply(lambda x: int(x["president"] == "Dalia Grybauskaite"), axis = 1)
print(df["grybauskaite"].mean())

dfa = df[df.grybauskaite == 0]
dfb = df[df.grybauskaite == 1]

In [None]:
# Foreign policy

print(dfa["president_Užsienio reikalų"].mean())
print(dfb["president_Užsienio reikalų"].mean())

ttest_ind(list(dfa["president_Užsienio reikalų"]), list(dfb["president_Užsienio reikalų"]))

In [None]:
# Defense

print(dfa["president_Krašto apsaugos"].mean())
print(dfb["president_Krašto apsaugos"].mean())

ttest_ind(list(dfa["president_Krašto apsaugos"]), list(dfb["president_Krašto apsaugos"]))

# Q 2.2: Does Nauseda pay more attention to healthcare? 

In [None]:
df["nauseda"] = df.apply(lambda x: int(x["president"] == "Gitanas Nausėda"), axis = 1)
print(df["nauseda"].mean())

dfa = df[df.nauseda == 0]
dfb = df[df.nauseda == 1]

In [None]:
# Healthcare

print(dfa["president_Sveikatos apsaugos"].mean())
print(dfb["president_Sveikatos apsaugos"].mean())

ttest_ind(list(dfa["president_Sveikatos apsaugos"]), list(dfb["president_Sveikatos apsaugos"]))

# Does presidential attention mirror media attention?

In [None]:
ministries = ['Krašto apsaugos', 'Premjeras',
       'Teisingumo', 'Žemės Ūkio',
       'Socialinės apsaugos ir darbo', 'Ekonomikos ir inovacijų', 'Ūkio',
       'Vidaus reikalų', 'Susisiekimo', 'Švietimo ir mokslo', 'Finansų',
       'Švietimo, mokslo ir sporto', 'Kultūros', 'Energetikos', 'Aplinkos ',
       'Užsienio reikalų', 'Sveikatos apsaugos',]


ministries2 = ["president_"+i for i in ministries]

ministries_full = ministries + ministries2

agg_dict = {i:"mean" for i in ministries_full}
agg_dict["president"] = "last" 

dfg = df.groupby(["year", "month"]).agg(agg_dict)
dfg["grybauskaite"] = dfg.apply(lambda x:int(x["president"]=="Dalia Grybauskaite"), axis=1)
print(dfg.shape)
dfg.head()

In [None]:
for m in ministries:
    print(m)
    print(pearsonr(dfg[m], dfg["president_"+m]))
    print()
    

In [None]:
for m in ministries:
    print(m)
    y = dfg[["president_"+m]]
    X = dfg[[m, "grybauskaite"]]
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    
    print()

# Is presidential attention larger during the first year of presidential term?

In [None]:
dg1b = datetime(2009, 7, 12)
dg1e = datetime(2010, 7, 12)
dg2b = datetime(2014, 7, 12)
dg2e = datetime(2015, 7, 12)
gnb = datetime(2019, 7, 12)
gne = datetime(2020, 7, 12)

first_year = []

for i in df.date:
    plh = 0
    if dg1b <= i < dg1e :
        plh = 1
    if dg2b <= i < dg2e :
        plh = 1
    if gnb <= i < gne :
        plh = 1
    first_year.append(plh)
    
df["first_year"] = first_year
print(df.first_year.mean())

In [None]:
dfa = df[df.first_year == 0]
dfb = df[df.first_year == 1]


In [None]:
#for m in ministries2:
for m in columns:
    print(m)
    print("Normal: ", dfa[m].mean())
    print("First year: ", dfb[m].mean())
    print(ttest_ind(dfa[m], dfb[m]))
    print()

In [None]:
m="PresAtt"
print(m)
print("Normal: ", dfa[m].mean())
print("First year: ", dfb[m].mean())
print(ttest_ind(dfa[m], dfb[m]))
print()

# Top 5 Spheres for each president

In [None]:
prez_cols = ['president_Užsienio reikalų',
       'president_Vidaus reikalų', 'president_Švietimo ir mokslo',
       'president_Teisingumo', 'president_Sveikatos apsaugos',
       'president_Kultūros', 'president_Premjeras',
       'president_Švietimo, mokslo ir sporto', 'president_Energetikos',
       'president_Ekonomikos ir inovacijų',
       'president_Finansų', 'president_Žemės Ūkio',
       'president_Krašto apsaugos', 'president_Socialinės apsaugos ir darbo',
       'president_Aplinkos ', 'president_Susisiekimo', 'president_Ūkio']

In [None]:
df_adamkus = df[df["president"] == "Valdas Adamkus"]

adamkus_atention_dict = {}
for col in prez_cols:
    adamkus_atention_dict[col] = df_adamkus[col].mean()
    
s_adamkus_atention_dict = sorted(adamkus_atention_dict.items(), key = lambda x : x[1], reverse = True)
s_adamkus_atention_dict[0:5]

In [None]:
df_grybauskaite = df[df["president"] == "Dalia Grybauskaite"]

grybauskaite_atention_dict = {}
for col in prez_cols:
    grybauskaite_atention_dict[col] = df_grybauskaite[col].mean()
    
s_grybauskaite_atention_dict = sorted(grybauskaite_atention_dict.items(), key = lambda x : x[1], reverse = True)
s_grybauskaite_atention_dict[0:5]

In [None]:
df_nauseda = df[df["president"] == "Gitanas Nausėda"]

nauseda_atention_dict = {}
for col in prez_cols:
    nauseda_atention_dict[col] = df_nauseda[col].mean()
    
s_nauseda_atention_dict = sorted(nauseda_atention_dict.items(), key = lambda x : x[1], reverse = True)
s_nauseda_atention_dict[0:5]

In [None]:
df.columns

# Weak Cabinets 

In [None]:
df.cabinet.value_counts()


In [None]:
df["cab_cab_no"] = df.apply(lambda x : str(x["cabinet_no"])+"_"+x["cabinet"], axis = 1)
df.cab_cab_no.value_counts()

In [None]:
cabinet_strength = {
    "17_Saulius Skvernelis":None,
    "15_Andrius Kubilius" : 2.58,
    "16_Algirdas Butkevičius": 2.16, 
    "18_Ingrida Šimonytė" : None,
    "12_Algirdas Brazauskas": 2.33,
    "14_Gediminas Kirkilas": 1.8,
    "13_Algirdas Brazauskas" : 2.33,
    "11_Rolandas Paksas" : 1.33, 
    None: None, 
    "_": None
}

df["cab_strength"] = df.apply(lambda x : cabinet_strength[x["cab_cab_no"]], axis = 1)
print(df.shape)
df.head()

In [None]:
col_list = ["year", "month", "cab_cab_no", "cab_strength", "PresAtt"] + columns
df2 = df[col_list]
df2 = df2.dropna(subset= ["cab_strength"])
print(df2.shape)
df2.head()

In [None]:
agg_dict = {"cab_cab_no" : pd.Series.mode, "cab_strength" : "mean", "PresAtt": "mean"}
for c in columns:
    agg_dict[c] = "mean"

dfg2 = df2.groupby(["year", "month"]).agg(agg_dict)
print(dfg2.shape)
dfg2.head()

In [None]:
for m in columns:
    print(m)
    y = dfg2[[m]]
    X = dfg2[["cab_strength"]]
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    
    print()

In [None]:
dfg3 = df2.groupby(["cab_cab_no"]).agg({"cab_strength": "mean", "PresAtt": "mean"})
print(dfg3.shape)
dfg3.head(12)

# Trust in President

In [None]:
# Trust in president data from Mazvydas
year = [1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
        2012, 2013, 2014, 2015, 2016, 2017, 2018]

president_trust = [72.3, 79.6, 54.8, 49.9, 54.5, 54.4, 36.7, 58.0, 56.9, 58.1, 55.8, 57.6, 71.8, 
                   69.9, 61.4, 53.9, 52.3, 53.0, 48.7, 57.7, 53.1]

pres_pm_diff = [45.85, 56.1, 37.4, 30.87, 34.22, 27.88, 8.68, 38.33, 38.84, 38.15, 42.72, 45.01, 
                60.84, 58.59, 47.39, 31.58, 29.88, 27.03, 28.52, 34.21, 32.45]

pres_pm_diff_r = [2.7321496033, 3.3862186304, 3.1531375936, 2.6255924171, 2.6857142857, 2.0528700906, 
                  1.3098893252, 2.9506361323, 3.1506090808, 2.9151606426, 4.2735632184, 4.5864541833, 
                  6.5409836066,  6.1941489362, 4.3777619387, 2.4136078782, 2.3351206434, 2.0408163265, 
                  2.4153846154, 2.4576054538, 2.5691489362,]

lt = [True]*len(year)

df_p_trust = pd.DataFrame()
df_p_trust["year"] = year
df_p_trust["pres_trust"] = president_trust
df_p_trust["pres_pm_diff"] = pres_pm_diff
df_p_trust["pres_pm_diff"] = pres_pm_diff_r
print(df_p_trust.shape)
df_p_trust.head(10)


In [None]:
cols = ['PresAtt', 'President-Prime-Minister',  'President-Foreign-Affairs', 'President-Defense', 'President-Economy', 'President-Finance',
        'President-Healthcare',  'President-Research-Education', 'President-Internal-Affairs', 'President-Justice', 
        'President-Environment', 'President-Social-Security', 'President-Agriculture', 'President-Culture', 'President-Other']
agg_dict = {i : "mean" for i in cols}

dfg2 = df.groupby(["year"]).agg(agg_dict)
dfg3 = pd.DataFrame()
dfg3["year"]=list(dfg2.index)
for c in list(dfg2.columns):
    dfg3[c]= list(dfg2[c])
#dfg2["year_i"] = list(dfg2.index)

print(dfg3.shape)
dfg3.head()

In [None]:
dfgm = pd.merge(left = df_p_trust, right = dfg3, on = ["year"], how = "inner")
print(dfgm.shape)
dfgm.head(20)


In [None]:
ax1 = dfgm.plot.scatter(x='pres_trust',
                       y='PresAtt',
                       c='Black', 
                        ylabel = "Presidential Attention", 
                       xlabel = "President Popularity")

In [None]:
ax1 = dfgm.plot.scatter(x='pres_pm_diff',
                       y='PresAtt',
                       c='Black', 
                        ylabel = "Presidential Attention", 
                       xlabel = "President popularity relative to PM")

In [None]:
print(pearsonr(dfgm["PresAtt"], dfgm["pres_pm_diff"]))

In [None]:
cols = ['PresAtt', 'President-Prime-Minister',
       'President-Foreign-Affairs', 'President-Defense', 'President-Economy',
       'President-Finance', 'President-Healthcare',
       'President-Research-Education', 'President-Internal-Affairs',
       'President-Justice', 'President-Environment',
       'President-Social-Security', 'President-Agriculture',
       'President-Culture', 'President-Other']

for m in cols:
    print(m)
    y = dfgm[[m]]
    X = dfgm[["pres_trust"]]
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    
    print()

In [None]:
dfgm.plot.line("year", "pres_trust")