In [1]:
import pandas as pd
import numpy as np

# Temporal natural gas dependent variables
- <b>Goal</b>: Compile the temporal natural gas dependent variables for linear regression analysis 
- The variables of interest are:
    1. (intermediary) Peak gas capacity in a year 
    2. (independent var) Natural gas capacity in a year (and natural log) and associated emissions, plus the year of the first ever plant and how many year it has been since
    3. Share of retired share capacity in peak capacity [%]
    4. Level of phase out = 1 - share of current capacity in peak capacity [%]
- <b>Assumptions</b>:
    - plants with retired year labeled as "not found" are neglected in the analysis

In [2]:
plant_dir = "../../data/global_GEM/analysis_plant/"

In [3]:
crosswalk = pd.read_excel("../../data/country_names.xlsx",sheet_name="crosswalk")

### Gas capacity in a year and associated emissions

In [4]:
data_summary = pd.read_csv(plant_dir+"gas_plant_annual_summary.csv")
data_summary = data_summary[["Country","Year","Capacity elec. (MW)"]]
data_summary = data_summary.rename(columns={"Capacity elec. (MW)":"Current_MW"})
data_summary.head()

Unnamed: 0,Country,Year,Current_MW
0,United Kingdom,1924,50
1,Russia,1925,80
2,United Kingdom,1925,50
3,Russia,1926,80
4,United Kingdom,1926,50


In [5]:
# calculate the year coal capacity starts in a country
# then calculate the current year minus start year
gas_start_year = data_summary.groupby("Country").min()[["Year"]]
gas_start_year = gas_start_year.rename(columns={"Year":"start_year"})
current_summary = pd.merge(data_summary,gas_start_year,left_on="Country",right_index=True,how="left")
current_summary["duration_year"] = current_summary["Year"] - current_summary["start_year"]
current_summary

Unnamed: 0,Country,Year,Current_MW,start_year,duration_year
0,United Kingdom,1924,50,1924,0
1,Russia,1925,80,1925,0
2,United Kingdom,1925,50,1924,1
3,Russia,1926,80,1925,1
4,United Kingdom,1926,50,1924,2
...,...,...,...,...,...
4417,Uruguay,2023,840,2006,17
4418,Uzbekistan,2023,13122,1961,62
4419,Venezuela,2023,12025,1956,67
4420,Vietnam,2023,8146,1998,25


### Peak gas capacity in a year

In [7]:
year_list = data_summary["Year"].unique()
for yr in year_list:
    summary_select = data_summary[data_summary["Year"]<=yr]
    summary_select_group = summary_select.groupby("Country").max()[["Current_MW"]]
    summary_select_group["Year"] = yr
    if yr==year_list[0]:
        summary_peak = summary_select_group
    else:
        summary_peak = pd.concat([summary_peak,summary_select_group])
summary_peak = summary_peak.reset_index().set_index(["Country","Year"])
summary_peak = summary_peak.rename(columns={"Current_MW":"Peak_MW"})
summary_plusPeak = pd.concat([current_summary.set_index(["Country","Year"]),summary_peak],axis=1)
# countries with zero MW have been phase out
summary_plusPeak = summary_plusPeak.fillna(0) 
summary_plusPeak

Unnamed: 0_level_0,Unnamed: 1_level_0,Current_MW,start_year,duration_year,Peak_MW
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
United Kingdom,1924,50,1924,0,50
Russia,1925,80,1925,0,80
United Kingdom,1925,50,1924,1,50
Russia,1926,80,1925,1,80
United Kingdom,1926,50,1924,2,50
...,...,...,...,...,...
Uruguay,2023,840,2006,17,840
Uzbekistan,2023,13122,1961,62,13122
Venezuela,2023,12025,1956,67,12025
Vietnam,2023,8146,1998,25,8146


### Retired gas capacity 
- 107 retired plants
- 34 early retirements 

In [8]:
retired_data = pd.read_csv(plant_dir+"gas_plant_retirement.csv")
retired_data["Retired year"] = retired_data["Retired year"].replace("not found", np.nan)
retired_data["Retired year"] = retired_data["Retired year"].astype(float)
retired_data_early = retired_data[retired_data["Lifetime"]<30]

In [9]:
for ret_mode in ["Ret","EarlyRet"]:
    if ret_mode == "Ret":
        dataset = retired_data
    elif ret_mode == "EarlyRet":
        dataset = retired_data_early
    for yr in year_list:
        retired_select = dataset[dataset["Retired year"]<=yr]
        retired_select_group = retired_select.groupby("Country").sum()[["MW"]]
        retired_select_group["Year"] = yr
        if yr == year_list[0]:
            summary_retired = retired_select_group
        else:
            summary_retired = pd.concat([summary_retired,retired_select_group])
    summary_retired = summary_retired.reset_index().set_index(["Country","Year"])
    summary_retired = summary_retired.rename(columns={"MW":ret_mode+"_MW"})
    if ret_mode == "Ret":
        summary_retired_modes = summary_retired
    else:
        summary_retired_modes = pd.concat([summary_retired_modes,summary_retired],axis=1)
summary_plusRet = pd.concat([summary_plusPeak,summary_retired_modes],axis=1)
# fill zero for those that don't have retirement yet
summary_plusRet = summary_plusRet.fillna(0)
summary_plusRet

Unnamed: 0_level_0,Unnamed: 1_level_0,Current_MW,start_year,duration_year,Peak_MW,Ret_MW,EarlyRet_MW
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
United Kingdom,1924,50,1924,0,50,0.0,0.0
Russia,1925,80,1925,0,80,0.0,0.0
United Kingdom,1925,50,1924,1,50,0.0,0.0
Russia,1926,80,1925,1,80,0.0,0.0
United Kingdom,1926,50,1924,2,50,0.0,0.0
...,...,...,...,...,...,...,...
Uruguay,2023,840,2006,17,840,0.0,0.0
Uzbekistan,2023,13122,1961,62,13122,0.0,0.0
Venezuela,2023,12025,1956,67,12025,0.0,0.0
Vietnam,2023,8146,1998,25,8146,0.0,0.0


### Calculate the shares 

In [10]:
summary_all = summary_plusRet.copy()
summary_all["Share_Ret"] = summary_all["Ret_MW"]/summary_all["Peak_MW"]*100
summary_all["Share_EarlyRet"] = summary_all["EarlyRet_MW"]/summary_all["Peak_MW"]*100
summary_all["Share_PhasedOut"] = 100 - summary_all["Current_MW"]/summary_all["Peak_MW"]*100
summary_all["logCurrent_MW"] = np.log(summary_all["Current_MW"])
summary_all = summary_all.reset_index()
gas_summary_cols = summary_all.columns
for col_name in gas_summary_cols:
    summary_all = summary_all.rename(columns={col_name:"Gas_"+col_name})
summary_all

Unnamed: 0,Gas_Country,Gas_Year,Gas_Current_MW,Gas_start_year,Gas_duration_year,Gas_Peak_MW,Gas_Ret_MW,Gas_EarlyRet_MW,Gas_Share_Ret,Gas_Share_EarlyRet,Gas_Share_PhasedOut,Gas_logCurrent_MW
0,United Kingdom,1924,50,1924,0,50,0.0,0.0,0.0,0.0,0.0,3.912023
1,Russia,1925,80,1925,0,80,0.0,0.0,0.0,0.0,0.0,4.382027
2,United Kingdom,1925,50,1924,1,50,0.0,0.0,0.0,0.0,0.0,3.912023
3,Russia,1926,80,1925,1,80,0.0,0.0,0.0,0.0,0.0,4.382027
4,United Kingdom,1926,50,1924,2,50,0.0,0.0,0.0,0.0,0.0,3.912023
...,...,...,...,...,...,...,...,...,...,...,...,...
4417,Uruguay,2023,840,2006,17,840,0.0,0.0,0.0,0.0,0.0,6.733402
4418,Uzbekistan,2023,13122,1961,62,13122,0.0,0.0,0.0,0.0,0.0,9.482045
4419,Venezuela,2023,12025,1956,67,12025,0.0,0.0,0.0,0.0,0.0,9.394743
4420,Vietnam,2023,8146,1998,25,8146,0.0,0.0,0.0,0.0,0.0,9.005282


In [11]:
summary_all_print = pd.merge(summary_all,crosswalk[["Country Code","GEM_Name"]],left_on="Gas_Country",right_on="GEM_Name")
summary_all_print = summary_all_print.drop(columns=["GEM_Name","Gas_Country"])
summary_all_print = summary_all_print.rename(columns={"Gas_Year":"year"})
summary_all_print = summary_all_print.set_index("year")
summary_all_print.to_csv("_all_temporal_gas_indicators.csv")
summary_all_print

Unnamed: 0_level_0,Gas_Current_MW,Gas_start_year,Gas_duration_year,Gas_Peak_MW,Gas_Ret_MW,Gas_EarlyRet_MW,Gas_Share_Ret,Gas_Share_EarlyRet,Gas_Share_PhasedOut,Gas_logCurrent_MW,Country Code
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1924,50,1924,0,50,0.0,0.0,0.0,0.0,0.0,3.912023,GBR
1925,50,1924,1,50,0.0,0.0,0.0,0.0,0.0,3.912023,GBR
1926,50,1924,2,50,0.0,0.0,0.0,0.0,0.0,3.912023,GBR
1927,50,1924,3,50,0.0,0.0,0.0,0.0,0.0,3.912023,GBR
1928,50,1924,4,50,0.0,0.0,0.0,0.0,0.0,3.912023,GBR
...,...,...,...,...,...,...,...,...,...,...,...
2021,65,2021,0,65,0.0,0.0,0.0,0.0,0.0,4.174387,TGO
2022,65,2021,1,65,0.0,0.0,0.0,0.0,0.0,4.174387,TGO
2023,65,2021,2,65,0.0,0.0,0.0,0.0,0.0,4.174387,TGO
2022,378,2022,0,378,0.0,0.0,0.0,0.0,0.0,5.934894,SLV
