In [1]:
import pandas as pd
import numpy as np

# Temporal coal dependent variables
- <b>Goal</b>: Compile the temporal coal dependent variables for linear regression analysis 
- The variables of interest are:
    1. (intermediary) Peak coal capacity in a year (or Peak coal capacity overall??)
    2. (independent var) Coal capacity  in a year and associated emissions, plus the year of the first ever plant and how many year it has been since
    3. Share of retired coal capacity in peak capacity [%]
    4. Share of transitioned coal capacity in peak capacity [%]
    5. Level of phase out = 1 - share of current capacity in peak capacity [%]
- <b>Assumptions</b>:
    - plants with retired year labeled as "not found" are neglected in the analysis

In [2]:
plant_dir = "../../data/global_GEM/analysis_plant/"

### Coal capacity in a year and associated emissions

In [3]:
coal_data_summary = pd.read_csv(plant_dir+"coal_plant_annual_summary.csv")
coal_data_summary = coal_data_summary[["Country","Year","MW","AnnualMtCO2"]]
coal_data_summary.head()

Unnamed: 0,Country,Year,MW,AnnualMtCO2
0,Germany,1927,36.0,0.2
1,Germany,1928,36.0,0.2
2,Germany,1929,36.0,0.2
3,Germany,1930,36.0,0.2
4,Germany,1931,36.0,0.2


In [4]:
# calculate the year coal capacity starts in a country
# then calculate the current year minus start year
coal_start_year = coal_data_summary.groupby("Country").min()[["Year"]]
coal_start_year = coal_start_year.rename(columns={"Year":"start_year"})
coal_summary = pd.merge(coal_data_summary.set_index("Country"),coal_start_year,left_on="Country",right_index=True,how="left")
coal_summary = coal_summary.reset_index()
coal_summary

Unnamed: 0,Country,Year,MW,AnnualMtCO2,start_year
0,Germany,1927,36.00,0.2,1927
1,Germany,1928,36.00,0.2,1927
2,Germany,1929,36.00,0.2,1927
3,Germany,1930,36.00,0.2,1927
4,Germany,1931,36.00,0.2,1927
...,...,...,...,...,...
3949,United States,2022,212041.66,1077.7,1935
3950,Uzbekistan,2022,2493.00,14.0,1957
3951,Vietnam,2022,24637.00,108.0,1975
3952,Zambia,2022,330.00,1.5,2015


In [5]:
coal_summary.query("Country=='Egypt'")

Unnamed: 0,Country,Year,MW,AnnualMtCO2,start_year


### Peak coal capacity in a year

In [5]:
year_list = coal_summary["Year"].unique()
for yr in year_list:
    summary_select = coal_summary[coal_summary["Year"]<=yr]
    summary_select_group = summary_select.groupby("Country").max()[["MW"]]
    summary_select_group["Year"] = yr
    if yr==year_list[0]:
        summary_peak = summary_select_group
    else:
        summary_peak = pd.concat([summary_peak,summary_select_group])
summary_peak = summary_peak.reset_index().set_index(["Country","Year"])
summary_peak = summary_peak.rename(columns={"MW":"Peak_MW"})
coal_summary_plusPeak = pd.concat([coal_summary.set_index(["Country","Year"]),summary_peak],axis=1)
# countries with zero MW have been phase out
coal_summary_plusPeak = coal_summary_plusPeak.fillna(0) 
# add correct start_year
wrong_start_year = coal_summary_plusPeak[coal_summary_plusPeak["start_year"]==0]
for ind in wrong_start_year.index:
    real_start_year = coal_summary_plusPeak.loc[ind[0]]["start_year"].max()
    coal_summary_plusPeak.loc[ind,"start_year"] = real_start_year
coal_summary_plusPeak

Unnamed: 0_level_0,Unnamed: 1_level_0,MW,AnnualMtCO2,start_year,Peak_MW
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Germany,1927,36.0,0.2,1927.0,36.0
Germany,1928,36.0,0.2,1927.0,36.0
Germany,1929,36.0,0.2,1927.0,36.0
Germany,1930,36.0,0.2,1927.0,36.0
Germany,1931,36.0,0.2,1927.0,36.0
...,...,...,...,...,...
Belgium,2022,0.0,0.0,1949.0,2865.0
Peru,2022,0.0,0.0,1999.0,135.0
Portugal,2022,0.0,0.0,1964.0,2028.0
Sweden,2022,0.0,0.0,1963.0,291.0


### Retired coal capacity 

In [22]:
retired_data = pd.read_csv(plant_dir+"coal2gas_retired_only.csv")
retired_data["Coal_EndYr"] = retired_data["Coal_EndYr"].replace("not found", np.nan)
retired_data["Coal_EndYr"] = retired_data["Coal_EndYr"].astype(float)
retired_data["Coal_StartYr"] = retired_data["Coal_StartYr"].replace("not found", np.nan)
retired_data["Coal_StartYr"] = retired_data["Coal_StartYr"].astype(float)
# use lifetime to calculate early retired coal 
retired_data["lifetime"] = retired_data["Coal_EndYr"]-retired_data["Coal_StartYr"]
retired_data_early = retired_data[retired_data["lifetime"]<30]
retired_data_early.head()

Unnamed: 0,Coal_TrackerLOC,Coal_Latitude,Coal_Longitude,Country,Coal_Plant,Coal_Status,Coal_EndYr,Coal_StartYr,Coal_MW,geometry,lifetime
7,L100009,-32.580534,151.07147,Australia,Redbank power station,retired,2014.0,2001.0,151.0,POINT (151.0714695 -32.5805339),13.0
14,L100012,-24.347222,150.617115,Australia,Callide Oxyfuel Project,retired,2015.0,2012.0,30.0,POINT (150.6171146 -24.347222),3.0
15,L100180,-20.80532,-70.193195,Chile,Patache power station,retired,2019.0,1998.0,154.0,POINT (-70.1931953 -20.8053203),21.0
17,L103158,-17.77597,-71.18928,Peru,Ilo power station,retired,2022.0,1999.0,135.0,POINT (-71.1892805 -17.7759701),23.0
18,L104549,9.379612,-79.821886,Panama,Bahía las Minas power station,retired,2021.0,2011.0,120.0,POINT (-79.82188600000001 9.379612),10.0


In [23]:
for ret_mode in ["Ret","EarlyRet"]:
    if ret_mode == "Ret":
        dataset = retired_data
    elif ret_mode == "EarlyRet":
        dataset = retired_data_early
    for yr in year_list:
        retired_select = dataset[dataset["Coal_EndYr"]<=yr]
        retired_select_group = retired_select.groupby("Country").sum()[["Coal_MW"]]
        retired_select_group["Year"] = yr
        if yr == year_list[0]:
            summary_retired = retired_select_group
        else:
            summary_retired = pd.concat([summary_retired,retired_select_group])
    summary_retired = summary_retired.reset_index().set_index(["Country","Year"])
    summary_retired = summary_retired.rename(columns={"Coal_MW":ret_mode+"_MW"})
    if ret_mode == "Ret":
        summary_retired_modes = summary_retired
    else:
        summary_retired_modes = pd.concat([summary_retired_modes,summary_retired],axis=1)
coal_summary_plusRet = pd.concat([coal_summary_plusPeak,summary_retired_modes],axis=1)
# fill zero for those that don't have retirement yet
coal_summary_plusRet = coal_summary_plusRet.fillna(0)
coal_summary_plusRet

Unnamed: 0_level_0,Unnamed: 1_level_0,MW,AnnualMtCO2,start_year,Peak_MW,Ret_MW,EarlyRet_MW
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Germany,1927,36.0,0.2,1927.0,36.0,0.0,0.0
Germany,1928,36.0,0.2,1927.0,36.0,0.0,0.0
Germany,1929,36.0,0.2,1927.0,36.0,0.0,0.0
Germany,1930,36.0,0.2,1927.0,36.0,0.0,0.0
Germany,1931,36.0,0.2,1927.0,36.0,0.0,0.0
...,...,...,...,...,...,...,...
Belgium,2022,0.0,0.0,1949.0,2865.0,2188.0,0.0
Peru,2022,0.0,0.0,1999.0,135.0,135.0,135.0
Portugal,2022,0.0,0.0,1964.0,2028.0,1296.0,0.0
Sweden,2022,0.0,0.0,1963.0,291.0,291.0,0.0


### Transitioned coal capacity

In [24]:
transition_data = pd.read_csv(plant_dir+"coal2gas_geopandas_match.csv")
transition_data["Coal_EndYr"] = transition_data["Coal_EndYr"].replace("not found",np.nan)
transition_data["Coal_EndYr"] = transition_data["Coal_EndYr"].astype(float)
transition_data = transition_data.replace("Hong Kong", "China")
# the coal dataset is in duplicate (it prioritized the gas data)
transition_data = transition_data[["Coal_Plant","Coal_Status","Coal_EndYr","Coal_MW","ParentID","Country"]]
transition_data = transition_data.drop_duplicates()
transition_data

Unnamed: 0,Coal_Plant,Coal_Status,Coal_EndYr,Coal_MW,ParentID,Country
0,Huntly power station,retired,2013.0,500.0,P100542P100542,New Zealand
4,Munmorah power station,retired,2012.0,600.0,P100343P100343,Australia
8,Kwinana power station,retired,2015.0,640.0,P101560P101560P101560P101560,Australia
12,Swanbank-A power station,retired,2012.0,396.0,P101418P101418P101418P101418P101418P101418,Australia
13,Kelvin power station,retired,2022.0,180.0,P100038P100038P100038P100038P100038P100038,South Africa
...,...,...,...,...,...,...
626,Ivanovskaya CHP-2 power station,retired,2015.0,200.0,P102172P102172P102172P102172,Russia
628,Cherepovetskaya power station,retired,2021.0,630.0,P100528P100528P100528,Russia
629,Serovskaya power station,retired,2018.0,388.0,P100528P100528P100528P100528,Russia
630,Suomenoja power station,retired,2020.0,90.0,P100505,Finland


In [26]:
for yr in year_list:
    transition_select = transition_data[transition_data["Coal_EndYr"]<=yr]
    transition_select_group = transition_select.groupby("Country").sum()[["Coal_MW"]]
    transition_select_group["Year"] = yr
    if yr == year_list[0]:
        summary_transition = transition_select_group
    else:
        summary_transition = pd.concat([summary_transition,transition_select_group])
summary_transition = summary_transition.reset_index().set_index(["Country","Year"])
summary_transition = summary_transition.rename(columns={"Coal_MW":"Tran_MW"})
coal_summary_plusTran = pd.concat([coal_summary_plusRet,summary_transition],axis=1)
# fill those without transition data as zero
coal_summary_plusTran = coal_summary_plusTran.reset_index()
coal_summary_plusTran["duration_year"] = coal_summary_plusTran["Year"] - coal_summary_plusTran["start_year"]
coal_summary_plusTran = coal_summary_plusTran.fillna(0)
coal_summary_plusTran

Unnamed: 0,Country,Year,MW,AnnualMtCO2,start_year,Peak_MW,Ret_MW,EarlyRet_MW,Tran_MW,duration_year
0,Germany,1927,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,0.0
1,Germany,1928,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,1.0
2,Germany,1929,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,2.0
3,Germany,1930,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,3.0
4,Germany,1931,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,4.0
...,...,...,...,...,...,...,...,...,...,...
3966,Belgium,2022,0.0,0.0,1949.0,2865.0,2188.0,0.0,677.0,73.0
3967,Peru,2022,0.0,0.0,1999.0,135.0,135.0,135.0,0.0,23.0
3968,Portugal,2022,0.0,0.0,1964.0,2028.0,1296.0,0.0,732.0,58.0
3969,Sweden,2022,0.0,0.0,1963.0,291.0,291.0,0.0,0.0,59.0


### Calculate the shares 

In [30]:
coal_summary_all = coal_summary_plusTran.copy()
coal_summary_all = coal_summary_all.rename(columns={"MW":"Current_MW"})
coal_summary_all["Share_Ret"] = coal_summary_all["Ret_MW"]/coal_summary_all["Peak_MW"]*100
coal_summary_all["Share_EarlyRet"] = coal_summary_all["EarlyRet_MW"]/coal_summary_all["Peak_MW"]*100
coal_summary_all["Share_Tran"] = coal_summary_all["Tran_MW"]/coal_summary_all["Peak_MW"]*100
coal_summary_all["Share_PhasedOut"] = 100 - coal_summary_all["Current_MW"]/coal_summary_all["Peak_MW"]*100
coal_summary_all["logCurrent_MW"] = np.log(coal_summary_all["Current_MW"])
coal_summary_all["logCurrent_MW"] = coal_summary_all["logCurrent_MW"].replace(-np.inf,0)
coal_summary_all = coal_summary_all.set_index(["Country","Year"])
coal_summary_all

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0_level_0,Unnamed: 1_level_0,Current_MW,AnnualMtCO2,start_year,Peak_MW,Ret_MW,EarlyRet_MW,Tran_MW,duration_year,Share_Ret,Share_EarlyRet,Share_Tran,Share_PhasedOut,logCurrent_MW
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Germany,1927,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,3.583519
Germany,1928,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,1.0,0.000000,0.0,0.000000,0.0,3.583519
Germany,1929,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,2.0,0.000000,0.0,0.000000,0.0,3.583519
Germany,1930,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,3.0,0.000000,0.0,0.000000,0.0,3.583519
Germany,1931,36.0,0.2,1927.0,36.0,0.0,0.0,0.0,4.0,0.000000,0.0,0.000000,0.0,3.583519
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Belgium,2022,0.0,0.0,1949.0,2865.0,2188.0,0.0,677.0,73.0,76.369983,0.0,23.630017,100.0,0.000000
Peru,2022,0.0,0.0,1999.0,135.0,135.0,135.0,0.0,23.0,100.000000,100.0,0.000000,100.0,0.000000
Portugal,2022,0.0,0.0,1964.0,2028.0,1296.0,0.0,732.0,58.0,63.905325,0.0,36.094675,100.0,0.000000
Sweden,2022,0.0,0.0,1963.0,291.0,291.0,0.0,0.0,59.0,100.000000,0.0,0.000000,100.0,0.000000


In [31]:
coal_summary_cols = coal_summary_all.columns
for col_name in coal_summary_cols:
    coal_summary_all = coal_summary_all.rename(columns={col_name:"Coal_"+col_name})
coal_summary_all.to_csv("_all_temporal_coal_indicators.csv")