In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, pearsonr
import statsmodels.api as sm

%matplotlib inline

# TO DOs:

* Check Vaira-Vike
* Check LT-PMs in 2000 - 2005
* Do more scraping and re-run


In [None]:
df = pd.read_parquet("MainData.parquet")
print(df.shape)
df.head()

In [None]:
# Trust in president data from Mazvydas
year = [1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
        2012, 2013, 2014, 2015, 2016, 2017, 2018]

president_trust = [72.3, 79.6, 54.8, 49.9, 54.5, 54.4, 36.7, 58.0, 56.9, 58.1, 55.8, 57.6, 71.8, 
                   69.9, 61.4, 53.9, 52.3, 53.0, 48.7, 57.7, 53.1]

pres_pm_diff = [45.85, 56.1, 37.4, 30.87, 34.22, 27.88, 8.68, 38.33, 38.84, 38.15, 42.72, 45.01, 
                60.84, 58.59, 47.39, 31.58, 29.88, 27.03, 28.52, 34.21, 32.45]

pres_pm_diff_r = [2.7321496033, 3.3862186304, 3.1531375936, 2.6255924171, 2.6857142857, 2.0528700906, 
                  1.3098893252, 2.9506361323, 3.1506090808, 2.9151606426, 4.2735632184, 4.5864541833, 
                  6.5409836066,  6.1941489362, 4.3777619387, 2.4136078782, 2.3351206434, 2.0408163265, 
                  2.4153846154, 2.4576054538, 2.5691489362,]

lt = [True]*len(year)

df_p_trust = pd.DataFrame()
df_p_trust["year"] = year
df_p_trust["pres_trust"] = president_trust
df_p_trust["pres_pm_diff"] = pres_pm_diff
df_p_trust["pres_pm_diff"] = pres_pm_diff_r
df_p_trust["LT"] = lt
print(df_p_trust.shape)
df_p_trust.head(10)


In [None]:
df = pd.merge(df, df_p_trust, on=["year", "LT"], how = "left")
print(df.shape)
df.head()

In [None]:
df.groupby(["LT"]).agg({"PresAtt": "mean"})

In [None]:
lt = list(df[df["LT"]==True]["PresAtt"])
lv = list(df[df["LT"]==False]["PresAtt"])
ttest_ind(lt, lv)

In [None]:
df["LT"] = df["LT"].astype(int)
df["pandemic_period"] = df["pandemic_period"].astype(int)

In [None]:
df.columns

In [None]:
#df = df[df["year"]>2006]
print(df.shape)

# General Model

In [None]:
y = df["PresAtt"]
X = df[["LT", "first_year", "pm_strength", "fp_def"]] #"DaliaGryb", "pandemic_period"
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

In [None]:
df2 = df.dropna(subset=["pres_trust"])
print(df2.shape)
df2.head()

In [None]:

y = df2["PresAtt"]
X = df2[["first_year", "pm_strength", "fp_def", "pres_trust"]] #"DaliaGryb", "pandemic_period"
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

In [None]:

y = df2["PresAtt"]
X = df2[["first_year", "pm_strength", "fp_def", "pres_pm_diff"]] #"DaliaGryb", "pandemic_period"
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

In [None]:
# Define function to output plot of the model coefficients

def coefplot(results, figure_title):
    '''
    Takes in results of OLS model and returns a plot of 
    the coefficients with 95% confidence intervals.
    
    Removes intercept, so if uncentered will return error.
    '''
    # Create dataframe of results summary 
    coef_df = pd.DataFrame(results.summary().tables[1].data)
    
    # Add column names
    coef_df.columns = coef_df.iloc[0]

    # Drop the extra row with column labels
    coef_df=coef_df.drop(0)

    # Set index to variable names 
    coef_df = coef_df.set_index(coef_df.columns[0])

    # Change datatype from object to float
    coef_df = coef_df.astype(float)

    # Get errors; (coef - lower bound of conf interval)
    errors = coef_df['coef'] - coef_df['[0.025']
    
    # Append errors column to dataframe
    coef_df['errors'] = errors

    # Drop the constant for plotting
    coef_df = coef_df.drop(['const'])

    # Sort values by coef ascending
    coef_df = coef_df.sort_values(by=['coef'])

    ### Plot Coefficients ###

    # x-labels
    variables = list(coef_df.index.values)
    
    # Add variables column to dataframe
    coef_df['variables'] = variables
    
    # Set sns plot style back to 'poster'
    # This will make bars wide on plot
    sns.set_context("poster")

    # Define figure, axes, and plot
    fig, ax = plt.subplots(figsize=(15, 10))
    
    # Error bars for 95% confidence interval
    # Can increase capsize to add whiskers
    coef_df.plot(x='variables', y='coef', kind='bar',
                 ax=ax, color='none', fontsize=22, 
                 ecolor='steelblue',capsize=0,
                 yerr='errors', legend=False)
    
    # Set title & labels
    plt.title(figure_title,fontsize=30)
    ax.set_ylabel('Coefficients',fontsize=22)
    ax.set_xlabel('',fontsize=22)
    ax.tick_params(axis='x', labelrotation=45)
    
    # Coefficients
    ax.scatter(x=pd.np.arange(coef_df.shape[0]), 
               marker='o', s=80, 
               y=coef_df['coef'], color='steelblue')
    
    # Line to define zero on the y-axis
    ax.axhline(y=0, linestyle='--', color='red', linewidth=1)
    
    plt.ylim([-0.025, 0.175])
    
    return plt.show()


In [None]:
coefplot(results, "Main Model")

In [None]:
y = df["PresAtt"]
X = df[["LT", "first_year", "pm_strength", "fp_def", "DaliaGryb"]]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

In [None]:
y = df["PresAtt"]
X = df[["LT", "first_year", "pm_strength", "fp_def", "pandemic_period"]]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

# By Sphere

In [None]:
policy_spheres = ['Prime-Minister',
                  'Foreign-Affairs', 
                  'Defense',
                'Healthcare', 
                'Internal-Affairs', 
                'Economy', 
                'Finance', 
                'Social-Security',
                'Research-Education', 
                'Environment', 
                'Justice', 
                'Culture', 
                'Agriculture', 
                'Other']

for sphere in policy_spheres:
    print(sphere)
    
    y = df["President-"+sphere]
    
    X = df[[sphere, "LT", "first_year", "pm_strength", "pandemic_period"]] # "DaliaGryb"]]
    
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    
    coefplot(results, sphere)
    
    #print(results.summary())
    #print("*"*120)
    #print()

In [None]:
for m in columns2:
    print(m)
    y = dfg[["President-"+m]]
    X = dfg[[m, "grybauskaite"]]
    X = sm.add_constant(X)
    
    model = sm.OLS(y, X)
    results = model.fit()
    print(results.summary())
    
    

In [None]:

columns = [ 'President-Agriculture',
 'President-Culture',
 'President-Defense',
 'President-Economy',
 'President-Environment',
 'President-Finance',
 'President-Foreign-Affairs',
 'President-Healthcare',
 'President-Internal-Affairs',
 'President-Justice',
 'President-Other',
 'President-Prime-Minister',
 'President-Research-Education',
 'President-Social-Security',]


data = {item.replace("President-", "") : df[item].mean() for item in columns}
data_s = sorted(data.items(), key = lambda x:x[1])

label_vals = [i[0] for i in data_s]
values = [i[1] for i in data_s]

plt.rcParams["figure.figsize"] = (8,8)
y_pos = np.arange(len(values))

plt.barh(y_pos, values)
plt.yticks(y_pos, label_vals)

plt.show((16,16))


In [None]:
columns = ['President-Prime-Minister',
            'President-Foreign-Affairs',
            'President-Defense', 
            'President-Internal-Affairs',
            'President-Healthcare',]


agg_dict = {i : "mean" for i in columns}

df2 = df.groupby(["year"]).agg(agg_dict)
#print(df2.shape)
#df2.head()

years = list(df2.index)
data = [list(df2[i]) for i in columns]


fig, ax = plt.subplots(figsize=(14, 8))

for i in range(len(columns)):
    ax.plot(years, data[i], label=columns[i])

plt.legend()