In [7]:
import numpy as np
import pandas as pd
from inv_dict import wb_cow_dict



In [13]:
def get_cc(val):
    if val in wb_cow_dict:
        return wb_cow_dict[val]
    else:
        return 0
    
def get_year(val):
    return int(val)

def add_wd_rows(reign_df, wdi_df, variable_list):
    joint_df = reign_df.copy()
    yearlist = [str(i) for i in np.arange(1960, 2020)]
    for i in variable_list:
        df = wdi_df[wdi_df['Indicator Name'] == i]
        dfx = pd.melt(df, id_vars = ['Country Name'], value_vars=yearlist)
        dfx['ccode'] = dfx['Country Name'].apply(get_cc)
        dfx['year'] = dfx['variable'].apply(get_year)
        dfx['yearcode'] = (dfx['year']) + 10000*dfx['ccode']
        dfx[i] = dfx['value']
        dfx_limited = dfx[[i, 'yearcode']]
        joint_df = joint_df.join(dfx_limited.set_index('yearcode'), on='yearcode', how = 'inner')
    return joint_df

In [20]:
reign_df = pd.read_pickle('../data/pickles/df_late.pkl')
wdi_df = pd.read_pickle('../data/wdi_complete.pkl')

In [31]:
reign_df['coupyear']

28       False
29       False
30       False
31       False
32       False
         ...  
11364    False
11365    False
11366    False
11367    False
11368    False
Name: coupyear, Length: 8493, dtype: bool

In [9]:
populated_vars = ['Adolescent fertility rate (births per 1,000 women ages 15-19)', 
'Age dependency ratio (% of working-age population)', 
'Birth rate, crude (per 1,000 people)', 
'Death rate, crude (per 1,000 people)', 
'Fertility rate, total (births per woman)',
'Life expectancy at birth, female (years)',
'Life expectancy at birth, male (years)', 
'Mortality rate, adult, male (per 1,000 male adults)',
'Population ages 0-14 (% of total population)', 
'Population growth (annual %)',
'Rural population (% of total population)', 
'Urban population growth (annual %)']

hp_vars = ['GDP (constant 2010 US$)', 
'Gross national expenditure (% of GDP)', 
'GINI index (World Bank estimate)']

In [10]:
late_populated = ['Arable land (hectares per person)', 'Merchandise exports (current US$)', 'Merchandise imports (current US$)', 'Population growth (annual %)', 'Primary education, duration (years)', ]
late_med = ['Air transport, registered carrier departures worldwide', 'Foreign direct investment, net inflows (% of GDP)', 'GDP growth (annual %)', 'GDP per capita (constant 2010 US$)', 'Military expenditure (% of GDP)', 'Mineral rents (% of GDP)', 'Natural gas rents (% of GDP)', 'Net official development assistance and official aid received (constant 2015 US$)', 'Oil rents (% of GDP)', 'Trade (% of GDP)']

In [15]:
hp_vars

['GDP (constant 2010 US$)',
 'Gross national expenditure (% of GDP)',
 'GINI index (World Bank estimate)']

In [11]:
late_populated

['Arable land (hectares per person)',
 'Merchandise exports (current US$)',
 'Merchandise imports (current US$)',
 'Population growth (annual %)',
 'Primary education, duration (years)']

In [12]:
late_med

['Air transport, registered carrier departures worldwide',
 'Foreign direct investment, net inflows (% of GDP)',
 'GDP growth (annual %)',
 'GDP per capita (constant 2010 US$)',
 'Military expenditure (% of GDP)',
 'Mineral rents (% of GDP)',
 'Natural gas rents (% of GDP)',
 'Net official development assistance and official aid received (constant 2015 US$)',
 'Oil rents (% of GDP)',
 'Trade (% of GDP)']

In [16]:
populated_vars

['Adolescent fertility rate (births per 1,000 women ages 15-19)',
 'Age dependency ratio (% of working-age population)',
 'Birth rate, crude (per 1,000 people)',
 'Death rate, crude (per 1,000 people)',
 'Fertility rate, total (births per woman)',
 'Life expectancy at birth, female (years)',
 'Life expectancy at birth, male (years)',
 'Mortality rate, adult, male (per 1,000 male adults)',
 'Population ages 0-14 (% of total population)',
 'Population growth (annual %)',
 'Rural population (% of total population)',
 'Urban population growth (annual %)']

In [48]:
variable_list

['Life expectancy at birth, female (years)',
 'GDP growth (annual %)',
 'Mineral rents (% of GDP)',
 'Oil rents (% of GDP)',
 'Trade (% of GDP)',
 'Rural population (% of total population)']

In [40]:
variable_list = ['Life expectancy at birth, female (years)', 'GDP growth (annual %)', 'Mineral rents (% of GDP)', 'Oil rents (% of GDP)', 'Trade (% of GDP)', 'Rural population (% of total population)', ]

In [51]:
variable_list_expanded = ['Life expectancy at birth, female (years)', 'GDP growth (annual %)', 'Mineral rents (% of GDP)', 'Oil rents (% of GDP)', 'Trade (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Natural gas rents (% of GDP)', 'Population ages 0-14 (% of total population)', 'Rural population (% of total population)',  'Population growth (annual %)', 'GINI index (World Bank estimate)']

In [41]:
joint_df = add_wd_rows(reign_df, wdi_df, variable_list)

In [56]:
variable_list_expanded_test = ['Life expectancy at birth, female (years)', 'GDP growth (annual %)', 'Mineral rents (% of GDP)', 'Oil rents (% of GDP)', 'Trade (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Natural gas rents (% of GDP)', 'Population ages 0-14 (% of total population)', 'Rural population (% of total population)',  'Population growth (annual %)']

In [57]:
joint_df2 = add_wd_rows(reign_df, wdi_df, variable_list_expanded_test)

In [58]:
joint_df2.shape

(8041, 48)

In [60]:
joint_df2.dropna().to_pickle('../data/pickles/late_joined_expanded_no_na.pkl')

In [42]:
no_na_df = joint_df.dropna()

In [43]:
zero_df = joint_df.fillna(value = 0)

In [44]:
no_na_df.to_pickle('../data/pickles/late_joined_no_na.pkl')

In [45]:
zero_df.to_pickle('../data/pickles/late_joined_zero_fill.pkl')

In [47]:
no_na_df.columns

Index(['ccode', 'country', 'leader', 'year', 'month', 'elected', 'age', 'male',
       'militarycareer', 'tenure_months', 'government', 'anticipation',
       'ref_ant', 'leg_ant', 'exec_ant', 'irreg_lead_ant', 'election_now',
       'election_recent', 'leg_recent', 'exec_recent', 'lead_recent',
       'ref_recent', 'direct_recent', 'indirect_recent', 'victory_recent',
       'defeat_recent', 'change_recent', 'nochange_recent', 'delayed',
       'lastelection', 'loss', 'irregular', 'prev_conflict', 'precip',
       'yearcode', 'coupyear', 'coupsuc', 'risk',
       'Life expectancy at birth, female (years)', 'GDP growth (annual %)',
       'Mineral rents (% of GDP)', 'Oil rents (% of GDP)', 'Trade (% of GDP)',
       'Rural population (% of total population)'],
      dtype='object')