In [1]:
import pandas as pd
import numpy as np

# State Capacity Dataset 
- <b>Goal</b>: Compile the state capacity dataset (Hanson) for further regression analysis
- The dataset covers 190 countries between 1960-2015. Due to the complexity of the dataset, we do not extrapolate the values. In addition to the State Capacity values, we add the following exploratory variables:
    - Taxes on Income % Tax Revenue 
    - Taxes on Trade % Tax Revenue 
    - Total Tax Revenue % of GDP
- <b>Data citation</b>: Jonathan K. Hanson and Rachel Sigman, “Leviathan’s Latent Dimensions: Measuring State Capacity for Comparative Political Research.” Journal of Politics, forthcoming. https://doi.org/10.1086/715066

In [None]:
SC_dir = "../../data/global_Hanson_StateCapacityDataset/"

In [2]:
all_results = pd.read_stata(SC_dir+"StateCapacityDataset_v1.dta")
all_results

Unnamed: 0,cntrynum,country,year,iso3,iso2,ccode,scode,VDem,WBregion,sample_polity,...,milexpercap,milpercap,policecap,StateHist50s,bti_mo,weberian,wbstat,tax_inc_tax,tax_trade_tax,taxrev_gdp
0,1.0,Afghanistan,1960.0,AFG,AF,700.0,AFG,36.0,South Asia,1.0,...,0.948857,2.037175,,0.726306,,,,,,
1,1.0,Afghanistan,1961.0,AFG,AF,700.0,AFG,36.0,South Asia,1.0,...,0.934870,2.564674,,0.728006,,,,,,
2,1.0,Afghanistan,1962.0,AFG,AF,700.0,AFG,36.0,South Asia,1.0,...,0.876937,2.494962,,0.729685,,,,,,
3,1.0,Afghanistan,1963.0,AFG,AF,700.0,AFG,36.0,South Asia,1.0,...,0.936218,2.527849,,0.731343,,,,,,
4,1.0,Afghanistan,1964.0,AFG,AF,700.0,AFG,36.0,South Asia,1.0,...,0.913283,2.525196,,0.732981,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8832,199.0,South Sudan,2011.0,SSD,SS,525.0,SSU,32.0,Sub-Saharan Africa (all income levels),1.0,...,4.682920,3.107348,,0.599653,,,,,,0.009247
8833,199.0,South Sudan,2012.0,SSD,SS,525.0,SSU,32.0,Sub-Saharan Africa (all income levels),1.0,...,4.592208,2.697507,,0.601541,,,,,,0.025273
8834,199.0,South Sudan,2013.0,SSD,SS,525.0,SSU,32.0,Sub-Saharan Africa (all income levels),1.0,...,4.562591,2.937346,,0.603412,,,0.261111,,,0.018151
8835,199.0,South Sudan,2014.0,SSD,SS,525.0,SSU,32.0,Sub-Saharan Africa (all income levels),1.0,...,4.822781,2.919253,,0.605265,8.0,,0.294444,,,0.026940


In [3]:
var_list = ["cntrynum","country","year","iso3","Capacity","Capacity_sd","tax_inc_tax","tax_trade_tax","taxrev_gdp"]
all_results_select = all_results[var_list]
all_results_select = all_results_select.rename(columns={"Capacity":"StateCapacity","Capacity_sd":"StateCapacity_sd"})
all_results_select = all_results_select.set_index("cntrynum")
all_results_select.to_csv("_all_temporal_state_capacity.csv")
all_results_select

Unnamed: 0_level_0,country,year,iso3,StateCapacity,StateCapacity_sd,tax_inc_tax,tax_trade_tax,taxrev_gdp
cntrynum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,Afghanistan,1960.0,AFG,-1.3830,0.419495,,,
1.0,Afghanistan,1961.0,AFG,-1.3620,0.411242,,,
1.0,Afghanistan,1962.0,AFG,-1.3690,0.411928,,,
1.0,Afghanistan,1963.0,AFG,-1.3600,0.422916,,,
1.0,Afghanistan,1964.0,AFG,-1.3790,0.428595,,,
...,...,...,...,...,...,...,...,...
199.0,South Sudan,2011.0,SSD,-0.8025,0.440147,,,0.009247
199.0,South Sudan,2012.0,SSD,-0.9320,0.403032,,,0.025273
199.0,South Sudan,2013.0,SSD,-1.4000,0.345209,,,0.018151
199.0,South Sudan,2014.0,SSD,-1.4010,0.335133,,,0.026940
