In [1]:
import pandas as pd
import numpy as np

# Compile temporal independent variables
- <b>Goal</b>: Compile all temporal independent variables for further regression analysis. 
- This involves matching the country names (using a crosswalk) and years. Assumptions on how missing data is treated can be found in either the respective processing files (usually to fill the missing years), or below sections (to fill the missing countries)
- The groups of variables include:
    1. Climate opinion survey -> no data extrapolation 
    2. World development index: GDP, share of coal or gas in electricity, or share in GDP -> no data extrapolation 
    3. Energy statistics review: coal and gas reserves and production
    4. Fuel trade diversity and gravity 
    5. POLCON dataset: existance of judiciary, legislative etc. chambers and consistency
    6. Power sector reform: e.g. liberalization, consumer choice
    7. State capacity 
    8. Worldwide governance 
- To do list:
    9. Available LNG terminal 
    10. World value survey

In [2]:
# crosswalk - standardize with ISO3 at the end
crosswalk = pd.read_excel("../_data_process/country_names.xlsx")
crosswalk

Unnamed: 0,Country Code,Region,IncomeGroup,TableName,GEM_Name,Hanson_Name,BP_Name,Meta_Name,WEF_Name,Henisz_Name,Reform_Name,MESSAGE_Name
0,ABW,Latin America & Caribbean,High income,Aruba,,,,,,,Aruba,
1,AFG,South Asia,Low income,Afghanistan,,Afghanistan,Afghanistan,,,Afghanistan,Afghanistan,Afghanistan
2,AGO,Sub-Saharan Africa,Lower middle income,Angola,,Angola,,Angola,Angola,Angola,Angola,Angola
3,ALB,Europe & Central Asia,Upper middle income,Albania,,Albania,Albania,Albania,Albania,Albania,Albania,Albania
4,AND,Europe & Central Asia,High income,Andorra,,,,,,,Andorra,Andorra
...,...,...,...,...,...,...,...,...,...,...,...,...
211,XKX,Europe & Central Asia,Upper middle income,Kosovo,Kosovo,Kosovo,,Kosovo,,Kosovo,Kosovo,
212,YEM,Middle East & North Africa,Low income,"Yemen, Rep.",Yemen,Yemen,,Yemen,Yemen,,"Yemen, Rep.",Yemen
213,ZAF,Sub-Saharan Africa,Upper middle income,South Africa,South Africa,South Africa,South Africa,South Africa,South Africa,South Africa,South Africa,South Africa
214,ZMB,Sub-Saharan Africa,Low income,Zambia,Zambia,Zambia,Zambia,Zambia,Zambia,Zambia,Zambia,Zambia


### Climate opinion survey -  $\color{orange}{\text{social}}$

In [3]:
climate_survey = pd.read_csv("../_data_process/_all_temporal_climate_opinion.csv")
climate_survey

Unnamed: 0,country,year,Survey_Worry_%,Survey_Belief_%,Survey_FutureGen_%,Survey_Personal_%,Survey_FossilMore_%,Survey_FossilLess_%
0,Argentina,2021,88.000000,56.000000,68.000000,38.000000,21.000000,58.000000
1,Australia,2021,72.000000,50.000000,61.000000,19.000000,11.000000,71.000000
2,Brazil,2021,86.000000,53.000000,74.000000,56.000000,27.000000,54.000000
3,Canada,2021,78.000000,50.000000,64.000000,18.000000,12.000000,69.000000
4,Colombia,2021,90.000000,54.000000,77.000000,58.000000,26.000000,53.000000
...,...,...,...,...,...,...,...,...
136,Uruguay,2022,85.160842,56.147711,67.296866,32.039047,19.608326,61.067272
137,Uzbekistan,2022,78.558349,36.898455,36.332476,13.533452,24.500367,41.988850
138,Vietnam,2022,85.194041,27.349717,71.302560,51.805902,40.738441,35.108095
139,Yemen,2022,31.703961,20.528497,23.389677,11.408358,38.808648,18.752107


In [4]:
print(len(climate_survey["country"].unique()))
print(climate_survey["year"].min())
print(climate_survey["year"].max())
print(round(climate_survey["Survey_Belief_%"].mean(),2))
print(round(climate_survey["Survey_Belief_%"].std(),2))
print(round(climate_survey["Survey_Belief_%"].min(),2))
print(round(climate_survey["Survey_Belief_%"].max(),2))
print(len(climate_survey["Survey_Belief_%"]))

111
2021
2022
42.85
10.52
16.0
65.25
141


In [5]:
climate_survey_rename = pd.merge(climate_survey,crosswalk[["Country Code","Meta_Name"]],left_on="country",right_on="Meta_Name",how="outer")
climate_survey_rename = climate_survey_rename.drop(columns=["country","Meta_Name"])
climate_survey_rename = climate_survey_rename.set_index(["Country Code","year"])
climate_survey_rename = climate_survey_rename.dropna(how="all")
climate_survey_rename

Unnamed: 0_level_0,Unnamed: 1_level_0,Survey_Worry_%,Survey_Belief_%,Survey_FutureGen_%,Survey_Personal_%,Survey_FossilMore_%,Survey_FossilLess_%
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ARG,2021.0,88.000000,56.000000,68.000000,38.000000,21.000000,58.000000
ARG,2022.0,84.594690,55.385937,70.722520,39.090304,19.858865,60.063782
AUS,2021.0,72.000000,50.000000,61.000000,19.000000,11.000000,71.000000
AUS,2022.0,72.991429,49.768334,60.411572,21.170702,12.702930,66.152537
BRA,2021.0,86.000000,53.000000,74.000000,56.000000,27.000000,54.000000
...,...,...,...,...,...,...,...
ARE,2022.0,69.869396,39.027279,49.142363,32.829407,41.677586,29.960110
URY,2022.0,85.160842,56.147711,67.296866,32.039047,19.608326,61.067272
UZB,2022.0,78.558349,36.898455,36.332476,13.533452,24.500367,41.988850
YEM,2022.0,31.703961,20.528497,23.389677,11.408358,38.808648,18.752107


### World development index -  $\color{royalblue}{\text{political economy}}$

In [6]:
world_develpment = pd.read_csv("../_data_process/_all_temporal_world_development.csv")
world_develpment = world_develpment.set_index(["Country Code","year"])
world_develpment = world_develpment.drop(columns="WDI_EnergyGDP_kgOilEq/$1k")
world_develpment

Unnamed: 0_level_0,Unnamed: 1_level_0,GDPpc_2015$,GDPpc_2017$,Population,WDI_Manu_GDP_%,WDI_CoalRents_%,WDI_OilRents_%,WDI_NGRents_%,WDI_NG_El_%,WDI_Coal_El_%,WDI_Fossil_El_%,logGDPpc
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
AFG,1960,,,8622466.0,,,,,,,,
SYC,1960,3218.664448,,41700.0,,,,,,,,
SLE,1960,497.418491,,2301310.0,,,,,,,,
SGP,1960,3611.953651,,1646400.0,10.577547,,,,,,,
SXM,1960,,,2646.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
DEU,2022,43032.142085,53560.091056,84079811.0,18.450473,,,,16.490566,31.046312,,10.888559
GRC,2022,20167.558903,31516.636854,10566531.0,8.806199,,,,37.299770,10.411900,,10.358271
HUN,2022,16288.987145,35254.503304,9683505.0,17.181391,,,,24.943691,8.164414,,10.470349
IRL,2022,98561.624049,113870.785550,5086988.0,37.555767,,,,48.569740,7.254497,,11.642820


In [7]:
world_develpment_stats = world_develpment.reset_index()
print(len(world_develpment_stats["Country Code"].unique()))
print(world_develpment_stats["year"].min())
print(world_develpment_stats["year"].max())
print(round(world_develpment_stats["logGDPpc"].mean(),2))
print(round(world_develpment_stats["logGDPpc"].std(),2))
print(round(world_develpment_stats["logGDPpc"].min(),2))
print(round(world_develpment_stats["logGDPpc"].max(),2))
print(len(world_develpment_stats["logGDPpc"]))

266
1960
2022
9.13
1.17
6.08
11.97
16121


In [8]:
print(round(world_develpment_stats["WDI_Coal_El_%"].mean(),2))
print(round(world_develpment_stats["WDI_Coal_El_%"].std(),2))
print(round(world_develpment_stats["WDI_Coal_El_%"].min(),2))
print(round(world_develpment_stats["WDI_Coal_El_%"].max(),2))
print(len(world_develpment_stats["WDI_Coal_El_%"]))

18.65
25.74
0.0
100.0
16121


In [9]:
print(round(world_develpment_stats["WDI_NG_El_%"].mean(),2))
print(round(world_develpment_stats["WDI_NG_El_%"].std(),2))
print(round(world_develpment_stats["WDI_NG_El_%"].min(),2))
print(round(world_develpment_stats["WDI_NG_El_%"].max(),2))
print(len(world_develpment_stats["WDI_NG_El_%"]))

18.1
26.0
0.0
100.0
16121


### Energy statistical review -  $\color{royalblue}{\text{political economy}}$

In [10]:
energy_stats = pd.read_csv("../_data_process/_all_temporal_BPstats.csv")
energy_stats

Unnamed: 0,country,year,BP_GasReserve_tM3,BP_GasProduction_bM3,BP_CoalProduction_mTon,BP_CoalReserve_mTon,BP_GasR2P_yr,BP_CoalR2P_yr
0,Canada,1980,2.429700,71.227950,,,34.111609,
1,Mexico,1980,1.781325,25.115242,,,70.926055,
2,US,1980,5.396133,525.090747,,,10.276572,
3,Total North America,1980,9.607158,621.433939,,,15.459661,
4,Argentina,1980,0.624975,8.164624,,,76.546699,
...,...,...,...,...,...,...,...,...
4946,Middle East,2016,,,,,,500.0
4947,Middle East,2017,,,,,,500.0
4948,Middle East,2018,,,,,,500.0
4949,Middle East,2019,,,,,,500.0


In [11]:
energy_stats_rename = pd.merge(energy_stats,crosswalk[["Country Code","BP_Name"]],left_on="country",right_on="BP_Name",how="outer")
energy_stats_rename = energy_stats_rename.drop(columns=["country","BP_Name"])
energy_stats_rename = energy_stats_rename[energy_stats_rename["year"].isna()==False]
energy_stats_rename = energy_stats_rename[energy_stats_rename["Country Code"].isna()==False]
energy_stats_rename = energy_stats_rename.drop_duplicates()
energy_stats_rename = energy_stats_rename.set_index(["Country Code","year"])
energy_stats_rename["LogBP_GasR2P_yr"] = np.log(energy_stats_rename["BP_GasR2P_yr"])
energy_stats_rename["LogBP_GasR2P_yr"] = energy_stats_rename["LogBP_GasR2P_yr"].fillna(0)
energy_stats_rename

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0_level_0,Unnamed: 1_level_0,BP_GasReserve_tM3,BP_GasProduction_bM3,BP_CoalProduction_mTon,BP_CoalReserve_mTon,BP_GasR2P_yr,BP_CoalR2P_yr,LogBP_GasR2P_yr
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CAN,1980.0,2.429700,71.227950,,,34.111609,,3.529638
CAN,1981.0,2.498925,68.827650,40.088,,36.306993,,3.592010
CAN,1982.0,2.526225,72.247125,42.907,,34.966443,,3.554389
CAN,1983.0,2.547675,67.951350,44.807,,37.492633,,3.624144
CAN,1984.0,2.738775,74.475975,57.402,,36.773940,,3.604789
...,...,...,...,...,...,...,...,...
ZMB,2017.0,,,,10.0,,,0.000000
ZMB,2018.0,,,,10.0,,,0.000000
ZMB,2019.0,,,,10.0,,,0.000000
ZMB,2020.0,,,,10.0,,,0.000000


### Fuel trade diversity and gravity -  $\color{royalblue}{\text{political economy}}$
- so far only includes LNG data

In [12]:
gas_trade = pd.read_csv("../_data_process/_temporal_BACI_DGD_Gas_both.csv")
gas_trade = gas_trade.rename(columns={"country":"Country Code"})
gas_trade = gas_trade[gas_trade["year"]!=0]
gas_trade = gas_trade.set_index(["Country Code","year"])
#gas_trade["LogNetValue_Gas_Import"] = np.log(gas_trade["NetValue_Gas_Import"])
gas_trade

Unnamed: 0_level_0,Unnamed: 1_level_0,HHI_Gas_Import,contiguity_Gas_Import,agree_Gas_Import,sanction_Gas_Import,common_language_Gas_Import,colony_Gas_Import,HHI_Gas_Export,contiguity_Gas_Export,agree_Gas_Export,sanction_Gas_Export,common_language_Gas_Export,colony_Gas_Export,NetValue_Gas_Import,NetQuantity_Gas_Import
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
BEL,1995.0,0.296619,0.496219,1.000000,0.000000,0.760876,0.416791,0.490196,0.906559,0.998680,0.000000,0.952853,0.649268,1092030.077,9.679034e+06
BEL,1996.0,0.316463,0.460321,1.000000,0.000000,0.694677,0.408114,0.659054,0.908745,1.000000,0.000000,1.000000,0.026049,1320689.101,1.185681e+07
BEL,1997.0,0.305224,0.402619,1.000000,0.000000,0.707491,0.352952,0.981658,0.008484,1.000000,0.990757,0.009243,0.000893,1413984.196,1.154693e+07
BEL,1998.0,0.340924,0.195566,1.000000,0.000000,0.568168,0.136150,0.897484,0.946805,1.000000,0.000000,1.000000,0.000000,1451715.165,9.659974e+06
BEL,1999.0,0.387019,0.093431,0.999987,0.000013,0.445870,0.047905,0.997479,0.000082,0.998835,0.998738,0.000097,0.000000,1234508.643,9.094264e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VCT,2018.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,-79489.364,-1.790450e+05
VCT,2019.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,-69958.203,-1.828610e+05
VCT,2020.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,-54089.686,-2.253127e+05
CYM,2018.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,-0.131,-3.650000e-01


In [13]:
coal_trade = pd.read_csv("../_data_process/_temporal_BACI_DGD_Coal_both.csv")
coal_trade = coal_trade.rename(columns={"country":"Country Code"})
coal_trade = coal_trade[coal_trade["year"]!=0]
coal_trade = coal_trade.set_index(["Country Code","year"])
#coal_trade["LogNetValue_Coal_Import"] = np.log(coal_trade["NetValue_Coal_Import"])
#coal_trade["LogNetValue_Coal_Import"] = coal_trade["LogNetValue_Coal_Import"].fillna(0)
coal_trade

Unnamed: 0_level_0,Unnamed: 1_level_0,HHI_Coal_Import,contiguity_Coal_Import,agree_Coal_Import,sanction_Coal_Import,common_language_Coal_Import,colony_Coal_Import,HHI_Coal_Export,contiguity_Coal_Export,agree_Coal_Export,sanction_Coal_Export,common_language_Coal_Export,colony_Coal_Export,NetValue_Coal_Import,NetQuantity_Coal_Import
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
USA,1995.0,0.292268,0.188110,0.759553,0.188122,0.999630,0.000013,0.079788,0.131941,0.131942,0.700034,0.422958,0.253708,-3777471.082,-6.730652e+07
USA,1996.0,0.350755,0.188541,0.885144,0.188541,0.999242,0.000058,0.073304,0.099616,0.099616,0.676095,0.423930,0.158201,-3591048.809,-2.511773e+07
USA,1997.0,0.342787,0.160638,0.909158,0.160922,0.994025,0.001955,0.080748,0.129748,0.157845,0.706367,0.532890,0.181658,-3512023.245,-2.576268e+07
USA,1998.0,0.298241,0.147175,0.802874,0.458828,0.996489,0.004816,0.096629,0.200652,0.231414,0.675392,0.654798,0.142046,-2904608.088,-2.164091e+07
USA,1999.0,0.394687,0.113625,0.867115,0.009602,0.997151,0.007328,0.137138,0.270839,0.291520,0.874754,0.675110,0.155539,-1908749.519,-1.153252e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GIN,2020.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,-0.101,-7.000000e+00
SLB,2013.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.000000,-413.307,-5.000000e+03
SPM,2015.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,0.000000,-1.420,-2.656200e+01
TCD,2015.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,0.000000,-803.899,-2.000000e+04


In [14]:
gas_trade_stats = gas_trade.reset_index()
print(len(gas_trade_stats["Country Code"].unique()))
print(gas_trade_stats["year"].min())
print(gas_trade_stats["year"].max())

223
1995.0
2021.0


In [15]:
"""
print(round(gas_trade_stats["HHI_Gas_Export"].mean(),2))
print(round(LNG_trade_stats["HHI_LNG_Export"].std(),2))
print(round(LNG_trade_stats["HHI_LNG_Export"].min(),2))
print(round(LNG_trade_stats["HHI_LNG_Export"].max(),2))
print(len(LNG_trade_stats["HHI_LNG_Export"]))
"""

'\nprint(round(gas_trade_stats["HHI_Gas_Export"].mean(),2))\nprint(round(LNG_trade_stats["HHI_LNG_Export"].std(),2))\nprint(round(LNG_trade_stats["HHI_LNG_Export"].min(),2))\nprint(round(LNG_trade_stats["HHI_LNG_Export"].max(),2))\nprint(len(LNG_trade_stats["HHI_LNG_Export"]))\n'

In [16]:
"""
print(round(LNG_trade_stats["HHI_LNG_Import"].mean(),2))
print(round(LNG_trade_stats["HHI_LNG_Import"].std(),2))
print(round(LNG_trade_stats["HHI_LNG_Import"].min(),2))
print(round(LNG_trade_stats["HHI_LNG_Import"].max(),2))
print(len(LNG_trade_stats["HHI_LNG_Import"]))
"""

'\nprint(round(LNG_trade_stats["HHI_LNG_Import"].mean(),2))\nprint(round(LNG_trade_stats["HHI_LNG_Import"].std(),2))\nprint(round(LNG_trade_stats["HHI_LNG_Import"].min(),2))\nprint(round(LNG_trade_stats["HHI_LNG_Import"].max(),2))\nprint(len(LNG_trade_stats["HHI_LNG_Import"]))\n'

### POLCON dataset -  $\color{darkseagreen}{\text{institutional}}$

In [17]:
POLCON = pd.read_csv("../_data_process/_all_temporal_POLCON.csv")
POLCON = POLCON.rename(columns={"ctrynm":"Country Code"}).drop(columns="country_name")
POLCON = POLCON.set_index(["Country Code","year"])
POLCON

Unnamed: 0_level_0,Unnamed: 1_level_0,l1,l2,j,f,aligne_l1,aligne_l2,alignl1_l2,aligne_j
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
USA,1920,1.0,1.0,,,0.0,0.0,1.0,
USA,1921,1.0,1.0,,,1.0,1.0,1.0,
USA,1922,1.0,1.0,,,1.0,1.0,1.0,
USA,1923,1.0,1.0,,,1.0,1.0,1.0,
USA,1924,1.0,1.0,,,1.0,1.0,1.0,
...,...,...,...,...,...,...,...,...,...
WSM,2017,,,,,1.0,0.0,0.0,
WSM,2018,,,,,1.0,0.0,0.0,
WSM,2019,,,,,1.0,0.0,0.0,
WSM,2020,,,,,1.0,0.0,0.0,


### Power sector reform tracker -  $\color{darkseagreen}{\text{institutional}}$
- dropping repeat data for Venezuela, Macedonia, and Kyrgystan (using the most recent); no matching set for Central African Republic

In [18]:
PSRT = pd.read_csv("../_data_process/_all_temporal_power_reform.csv")
PSRT

Unnamed: 0,country,year,source,R_IndepProducer,R_Private,R_Unbundle,R_WholeSale,R_IndepReg,R_Choice,R_Liberalization,R_Corp
0,Greece,1999.0,Erdogdu 2011,0,0,1,0,1,0,1,0
1,Greece,2000.0,Erdogdu 2011,0,0,1,0,1,0,1,1
2,Greece,2001.0,Erdogdu 2011,0,0,1,0,1,1,1,1
3,Greece,2002.0,Erdogdu 2011,0,1,1,0,1,1,1,1
4,Greece,2003.0,Erdogdu 2011,0,1,1,0,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...
7293,Mozambique,2018.0,assumption,1,0,0,0,1,0,1,1
7294,Mozambique,2019.0,assumption,1,0,0,0,1,0,1,1
7295,Mozambique,2020.0,assumption,1,0,0,0,1,0,1,1
7296,Mozambique,2021.0,assumption,1,0,0,0,1,0,1,1


In [19]:
print(len(PSRT["country"].unique()))
print(PSRT["year"].min())
print(PSRT["year"].max())

178
1982.0
2022.0


In [20]:
print(round(PSRT["R_Choice"].mean(),2))
print(round(PSRT["R_Choice"].std(),2))
print(round(PSRT["R_Choice"].min(),2))
print(round(PSRT["R_Choice"].max(),2))
print(len(PSRT["R_Choice"]))

0.13
0.34
0
1
7298


In [21]:
PSRT_rename = pd.merge(PSRT,crosswalk[["Country Code","Reform_Name"]],left_on="country",right_on="Reform_Name",how="outer")
PSRT_rename = PSRT_rename.drop(columns=["country","Reform_Name","source"])
PSRT_rename = PSRT_rename.set_index(["Country Code","year"])
PSRT_rename = PSRT_rename.dropna(how="all")
PSRT_rename

Unnamed: 0_level_0,Unnamed: 1_level_0,R_IndepProducer,R_Private,R_Unbundle,R_WholeSale,R_IndepReg,R_Choice,R_Liberalization,R_Corp
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
GRC,1999.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
GRC,2000.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0
GRC,2001.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0
GRC,2002.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
GRC,2003.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...
MOZ,2018.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
MOZ,2019.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
MOZ,2020.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
MOZ,2021.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0


### State capacity dataset -  $\color{darkseagreen}{\text{institutional}}$

In [22]:
state_capacity = pd.read_csv("../_data_process/_all_temporal_state_capacity.csv")
state_capacity

Unnamed: 0,cntrynum,country,year,iso3,StateCapacity,StateCapacity_sd,tax_inc_tax,tax_trade_tax,taxrev_gdp
0,1.0,Afghanistan,1960.0,AFG,-1.3830,0.419495,,,
1,1.0,Afghanistan,1961.0,AFG,-1.3620,0.411242,,,
2,1.0,Afghanistan,1962.0,AFG,-1.3690,0.411928,,,
3,1.0,Afghanistan,1963.0,AFG,-1.3600,0.422916,,,
4,1.0,Afghanistan,1964.0,AFG,-1.3790,0.428595,,,
...,...,...,...,...,...,...,...,...,...
8832,199.0,South Sudan,2011.0,SSD,-0.8025,0.440147,,,0.009247
8833,199.0,South Sudan,2012.0,SSD,-0.9320,0.403032,,,0.025273
8834,199.0,South Sudan,2013.0,SSD,-1.4000,0.345209,,,0.018151
8835,199.0,South Sudan,2014.0,SSD,-1.4010,0.335133,,,0.026940


In [23]:
state_capacity_rename = pd.merge(state_capacity,crosswalk[["Country Code","Hanson_Name"]],left_on="country",right_on="Hanson_Name",how="outer")
state_capacity_rename = state_capacity_rename.drop(columns=["country","Hanson_Name","cntrynum","iso3"])
state_capacity_rename = state_capacity_rename.set_index(["Country Code","year"])
state_capacity_rename = state_capacity_rename.dropna(how="all")
state_capacity_rename

Unnamed: 0_level_0,Unnamed: 1_level_0,StateCapacity,StateCapacity_sd,tax_inc_tax,tax_trade_tax,taxrev_gdp
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AFG,1960.0,-1.3830,0.419495,,,
AFG,1961.0,-1.3620,0.411242,,,
AFG,1962.0,-1.3690,0.411928,,,
AFG,1963.0,-1.3600,0.422916,,,
AFG,1964.0,-1.3790,0.428595,,,
...,...,...,...,...,...,...
SSD,2011.0,-0.8025,0.440147,,,0.009247
SSD,2012.0,-0.9320,0.403032,,,0.025273
SSD,2013.0,-1.4000,0.345209,,,0.018151
SSD,2014.0,-1.4010,0.335133,,,0.026940


### Worldwide governance -  $\color{darkseagreen}{\text{institutional}}$

In [24]:
world_governance = pd.read_csv("../_data_process/_all_temporal_worldwide_governance.csv")
world_governance = world_governance.set_index(["Country Code","year"])
world_governance

Unnamed: 0_level_0,Unnamed: 1_level_0,WG_GovEff,WG_Reg,WG_Voice
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABW,1996.0,,,
ABW,1997.0,,,
ABW,1998.0,,,
ABW,1999.0,,,
ABW,2000.0,,,
...,...,...,...,...
ZWE,2017.0,-1.238663,-1.564137,-1.195904
ZWE,2018.0,-1.261774,-1.509489,-1.140975
ZWE,2019.0,-1.267288,-1.468613,-1.164705
ZWE,2020.0,-1.299779,-1.416232,-1.113716


### Policy (Fankhauser)

In [25]:
policy_fankhauser = pd.read_csv("../_data_process/_all_temporal_policy.csv")
policy_fankhauser = policy_fankhauser.rename(columns={"ISO":"Country Code","Year":"year"})
policy_fankhauser = policy_fankhauser.set_index(["Country Code","year"])
policy_fankhauser

Unnamed: 0_level_0,Unnamed: 1_level_0,num_supply_policy,binary_airqual,ghg_ambition_n,cw_2030,annex_one,annex_two
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
DZA,1980.0,0.0,0.0,0.0,0.0,0.0,0.0
DZA,1981.0,0.0,0.0,0.0,0.0,0.0,0.0
DZA,1982.0,0.0,0.0,0.0,0.0,0.0,0.0
DZA,1983.0,0.0,0.0,0.0,0.0,0.0,0.0
DZA,1984.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
ALB,2019.0,3.0,1.0,1.0,1.0,0.0,0.0
ALB,2020.0,3.0,1.0,1.0,1.0,0.0,0.0
ALB,2021.0,3.0,1.0,1.0,1.0,0.0,0.0
ALB,2022.0,3.0,1.0,1.0,1.0,0.0,0.0


## Compile datasets together

In [26]:
data_list = [climate_survey_rename, world_develpment, energy_stats_rename,
             gas_trade, coal_trade,POLCON, PSRT_rename, state_capacity_rename, world_governance,policy_fankhauser]
for count in np.arange(len(data_list)):
    data = data_list[count]
    if count==0:
        data_compile = data
    else:
        data_compile = pd.merge(data_compile,data,on=["Country Code","year"],how="outer")
data_compile

Unnamed: 0_level_0,Unnamed: 1_level_0,Survey_Worry_%,Survey_Belief_%,Survey_FutureGen_%,Survey_Personal_%,Survey_FossilMore_%,Survey_FossilLess_%,GDPpc_2015$,GDPpc_2017$,Population,WDI_Manu_GDP_%,...,taxrev_gdp,WG_GovEff,WG_Reg,WG_Voice,num_supply_policy,binary_airqual,ghg_ambition_n,cw_2030,annex_one,annex_two
Country Code,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ARG,2021.0,88.000000,56.000000,68.000000,38.000000,21.000000,58.000000,12402.490791,21527.196137,45808747.0,15.479166,...,,-0.356662,-0.618145,0.619620,12.0,1.0,1.0,1.0,0.0,0.0
ARG,2022.0,84.594690,55.385937,70.722520,39.090304,19.858865,60.063782,,,,,...,,,,,12.0,1.0,1.0,1.0,0.0,0.0
AUS,2021.0,72.000000,50.000000,61.000000,19.000000,11.000000,71.000000,59339.684345,49774.340702,25688079.0,5.548834,...,,1.512928,1.838728,1.379442,8.0,1.0,0.0,0.0,1.0,1.0
AUS,2022.0,72.991429,49.768334,60.411572,21.170702,12.702930,66.152537,,,,,...,,,,,8.0,1.0,0.0,0.0,1.0,1.0
BRA,2021.0,86.000000,53.000000,74.000000,56.000000,27.000000,54.000000,8621.730590,14735.581783,214326223.0,10.223030,...,,-0.460292,-0.110795,0.278197,11.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
COK,1994.0,,,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
COK,2022.0,,,,,,,,,,,...,,,,,5.0,0.0,0.0,0.0,0.0,0.0
COK,2023.0,,,,,,,,,,,...,,,,,5.0,0.0,0.0,0.0,0.0,0.0
LBY,2023.0,,,,,,,,,,,...,,,,,1.0,0.0,0.0,0.0,0.0,0.0


In [27]:
# choose only the countries in the crosswalk file
data_compile_format = data_compile.reset_index()
cw_countries = crosswalk["Country Code"].unique()
data_compile_format_crop = data_compile_format[data_compile_format["Country Code"].isin(cw_countries)]
data_compile_format_crop = data_compile_format_crop.sort_values(by="year")
data_compile_format_crop

Unnamed: 0,Country Code,year,Survey_Worry_%,Survey_Belief_%,Survey_FutureGen_%,Survey_Personal_%,Survey_FossilMore_%,Survey_FossilLess_%,GDPpc_2015$,GDPpc_2017$,...,taxrev_gdp,WG_GovEff,WG_Reg,WG_Voice,num_supply_policy,binary_airqual,ghg_ambition_n,cw_2030,annex_one,annex_two
17716,ARG,1920.0,,,,,,,,,...,,,,,,,,,,
17676,CHL,1920.0,,,,,,,,,...,,,,,,,,,,
17534,BOL,1920.0,,,,,,,,,...,,,,,,,,,,
17494,BRA,1920.0,,,,,,,,,...,,,,,,,,,,
17454,PER,1920.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26000,SEN,2023.0,,,,,,,,,...,,,,,5.0,1.0,0.0,0.0,0.0,0.0
26001,SGP,2023.0,,,,,,,,,...,,,,,6.0,1.0,0.0,0.0,0.0,0.0
26002,SVK,2023.0,,,,,,,,,...,,,,,10.0,0.0,0.0,0.0,1.0,0.0
25992,PAK,2023.0,,,,,,,,,...,,,,,5.0,1.0,1.0,1.0,0.0,0.0


In [28]:
# fill the production and reserve data as 0 if they do not appear in the original list of countries
unique_ff_producers = energy_stats_rename.reset_index()["Country Code"].unique()
not_producers = data_compile_format_crop[data_compile_format_crop["Country Code"].isin(unique_ff_producers)==False].index
data_compile_format_crop.loc[not_producers,energy_stats_rename.columns] = data_compile_format_crop.loc[not_producers,energy_stats_rename.columns].fillna(0)
data_compile_format_crop = data_compile_format_crop.sort_values(by="year")
data_compile_format_crop

Unnamed: 0,Country Code,year,Survey_Worry_%,Survey_Belief_%,Survey_FutureGen_%,Survey_Personal_%,Survey_FossilMore_%,Survey_FossilLess_%,GDPpc_2015$,GDPpc_2017$,...,taxrev_gdp,WG_GovEff,WG_Reg,WG_Voice,num_supply_policy,binary_airqual,ghg_ambition_n,cw_2030,annex_one,annex_two
17716,ARG,1920.0,,,,,,,,,...,,,,,,,,,,
21247,ETH,1920.0,,,,,,,,,...,,,,,,,,,,
20570,LBR,1920.0,,,,,,,,,...,,,,,,,,,,
19904,NOR,1920.0,,,,,,,,,...,,,,,,,,,,
19762,FIN,1920.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26082,TGO,2023.0,,,,,,,,,...,,,,,3.0,0.0,1.0,1.0,0.0,0.0
26083,BRN,2023.0,,,,,,,,,...,,,,,1.0,0.0,0.0,0.0,0.0,0.0
26084,BDI,2023.0,,,,,,,,,...,,,,,2.0,0.0,0.0,1.0,0.0,0.0
26105,XKX,2023.0,,,,,,,,,...,,,,,1.0,0.0,0.0,0.0,0.0,0.0


In [29]:
# select the countries and unique year in the LNG trade dataset
# within those years, the countries not in the dataset are set to 0
unique_trade = gas_trade.reset_index()["Country Code"].unique()
unique_trade_years = gas_trade.reset_index()["year"].unique()
not_trade_table = data_compile_format_crop[data_compile_format_crop["Country Code"].isin(unique_trade)==False]
not_trade_table_years = not_trade_table[not_trade_table["year"].isin(unique_trade_years)]
not_trade = not_trade_table_years.index
data_compile_format_crop.loc[not_trade,gas_trade.columns] = data_compile_format_crop.loc[not_trade,gas_trade.columns].fillna(0)
data_compile_format_crop = data_compile_format_crop.sort_values(by="year")
data_compile_format_crop[["num_supply_policy","binary_airqual","ghg_ambition_n","cw_2030","annex_one","annex_two"]] = data_compile_format_crop[["num_supply_policy","binary_airqual","ghg_ambition_n","cw_2030","annex_one","annex_two"]].fillna(0)
data_compile_format_crop

Unnamed: 0,Country Code,year,Survey_Worry_%,Survey_Belief_%,Survey_FutureGen_%,Survey_Personal_%,Survey_FossilMore_%,Survey_FossilLess_%,GDPpc_2015$,GDPpc_2017$,...,taxrev_gdp,WG_GovEff,WG_Reg,WG_Voice,num_supply_policy,binary_airqual,ghg_ambition_n,cw_2030,annex_one,annex_two
17716,ARG,1920.0,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
17534,BOL,1920.0,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
17494,BRA,1920.0,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
17454,PER,1920.0,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
17414,ECU,1920.0,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26002,SVK,2023.0,,,,,,,,,...,,,,,10.0,0.0,0.0,0.0,1.0,0.0
25962,GUY,2023.0,,,,,,,,,...,,,,,2.0,1.0,0.0,0.0,0.0,0.0
25983,MNG,2023.0,,,,,,,,,...,,,,,8.0,1.0,1.0,0.0,0.0,0.0
25995,POL,2023.0,,,,,,,,,...,,,,,10.0,1.0,0.0,0.0,1.0,0.0


In [30]:
data_compile_export = data_compile_format_crop.set_index(["Country Code","year"])
data_compile_export.to_csv("_all_temporal_indep_var.csv")