In [1]:
# packages
import pandas as pd
import numpy as np

# Power Sector Reform Tracker (PSRT) dataset
- <b>Goal</b>: compile the power reform dataset from Erdogdu 2011 and Urpelainen and Yang 2018 
- The Erdogdu dataset has 92 countries and Urpelainen and Yang has 142 countries. Both dataset cover the period bewteen 1982-2013. Below are the 8 variable descriptions from the codebook
    1. Liberalization law: Enacting laws to mandate the restructuring of power sector
    2. Corporatization: Corporatizing and commercializing state-owned utilities by removing the direct control of government or by creating an independent corporation
    3. Independent regulatory agency: creating and authorizing independent regulatory agencies to set electricity tariffs, issue licenses, and control access to the transmission network, and enforce regulatory requirements.
    4. Independent power producers: Legalizing Independent Power Producers (IPPs) to contemplate private sector participation and allowing private investors to set up their own generation facilities even without comprehensive reform 
    5. Unbundling: Using vertical and horizontal restructuring to separate generation and retail activities from the natural monopolistic segments, such as transmission and distribution, and to introduce competition.
    6. Privatization: Privatizing operating entities to restore financial discipline, create incentives for performance improvements and cost efficiency, and limit political interference
    7. Wholesale electricity markets: Creating voluntary public wholesale energy market institutions to allow competitive suppliers and marketers to generate electricity, access to the grid and facilitate trade among suppliers and between buyers and sellers 
    8. Choice of supplier: Allowing competing generators and intermediaries to offer services to consumers; enabling consumers to choose their own retail power suppliers
- <b>Assumptions</b>:
    - if both datasets have repeat coverage on country and year, use the 2018 version 
    - most recent data (2013 or 2013) to carry forward 2021
- <b>Data citation</b>:
    - Urpelainen and Yang dataset link: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/M7SY6X ; publication link: https://www.sciencedirect.com/science/article/pii/S2211467X18301147#sec4
    - Erdogdu publication:  https://www.sciencedirect.com/science/article/pii/S0301421511005945#f0005 (dataset downloaded from Appendix B)

In [2]:
reform_dir = "../../data/global_UrpelainenYang_reform/"

In [3]:
all_results = pd.read_stata(reform_dir+"PSRT_V1_Database.dta")
all_results

Unnamed: 0,cntry,year,r_ipp,r_prv,r_und,r_wem,r_reg,r_cos,r_law,r_cor,r_ipp_notes,r_prv_notes,r_und_notes,r_wem_notes,r_reg_notes,r_cos_notes,r_law_notes,r_cor_notes
0,Rwanda,1982.0,0,0,0,0,0,0,0,0,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
1,Tonga,1982.0,0,0,0,0,0,0,0,0,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
2,Angola,1982.0,0,0,0,0,0,0,0,0,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
3,Burkina Faso,1982.0,0,0,0,0,0,0,0,0,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
4,Lesotho,1982.0,0,0,0,0,0,0,0,0,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4537,Kosovo,2012.0,0,1,0,0,0,0,0,1,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
4538,Aruba,2012.0,0,0,0,0,0,0,0,1,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
4539,Kosovo,2013.0,0,1,1,0,1,0,1,1,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding
4540,Greenland,2013.0,0,0,0,0,0,0,0,0,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding,New Coding


In [4]:
erdogdu_2011 = pd.read_excel(reform_dir+"Erdogdu_2011.xls",skiprows=8)
erdogdu_2011

Unnamed: 0,ctrno,cntry,ctr_code,year,plf,res_mar,pr_loses,g_pe_el,g_pe_egw,r_scr,...,emp_el,emp_egw,r_ipp,r_prv,r_und,r_wem,r_reg,r_cos,r_law,r_cor
0,1,Albania,ALB,1982,0.671554,,0.052718,,,0,...,,,0,0,0,0,0,0,0,0
1,1,Albania,ALB,1983,0.625372,,0.052545,,,0,...,,,0,0,0,0,0,0,0,0
2,1,Albania,ALB,1984,0.597627,,0.054109,,,0,...,,,0,0,0,0,0,0,0,0
3,1,Albania,ALB,1985,0.488771,,0.068191,,,0,...,,,0,0,0,0,0,0,0,0
4,1,Albania,ALB,1986,0.766943,,0.037727,,,0,...,,,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2479,92,Zimbabwe,ZWE,2004,0.528519,,0.127017,,,4,...,,,0,0,1,0,1,0,1,1
2480,92,Zimbabwe,ZWE,2005,0.558485,,0.057586,,,4,...,,,0,0,1,0,1,0,1,1
2481,92,Zimbabwe,ZWE,2006,0.531673,,0.057562,,,4,...,,,0,0,1,0,1,0,1,1
2482,92,Zimbabwe,ZWE,2007,0.499259,,0.057572,,,4,...,,,0,0,1,0,1,0,1,1


In [5]:
panel_data1 = all_results[["cntry","year","r_ipp","r_prv","r_und","r_wem","r_reg","r_cos","r_law","r_cor"]]
panel_data1.loc[:,"source"] = "Urpelainen and Yang 2018"
panel_data2 = erdogdu_2011[["cntry","year","r_ipp","r_prv","r_und","r_wem","r_reg","r_cos","r_law","r_cor"]]
panel_data2.loc[:,"source"] = "Erdogdu 2011"
panel_data = pd.concat([panel_data1,panel_data2])
panel_data = panel_data.set_index("source")
panel_data = panel_data.drop_duplicates()
panel_data = panel_data.reset_index()
panel_data = panel_data.sort_values(by="source").drop_duplicates(['cntry','year'], keep='last')
panel_data = panel_data.rename(columns={'r_ipp':"R_IndepProducer", 'r_prv':"R_Private", 'r_und':"R_Unbundle", 'r_wem':"R_WholeSale", 
                                          'r_reg':"R_IndepReg", 'r_cos':"R_Choice",'r_law':"R_Liberalization", 'r_cor':"R_Corp","cntry":"country"})
panel_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  panel_data1.loc[:,"source"] = "Urpelainen and Yang 2018"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  panel_data2.loc[:,"source"] = "Erdogdu 2011"


Unnamed: 0,source,country,year,R_IndepProducer,R_Private,R_Unbundle,R_WholeSale,R_IndepReg,R_Choice,R_Liberalization,R_Corp
5039,Erdogdu 2011,Greece,1999.0,0,0,1,0,1,0,1,0
5040,Erdogdu 2011,Greece,2000.0,0,0,1,0,1,0,1,1
5041,Erdogdu 2011,Greece,2001.0,0,0,1,0,1,1,1,1
5042,Erdogdu 2011,Greece,2002.0,0,1,1,0,1,1,1,1
5043,Erdogdu 2011,Greece,2003.0,0,1,1,0,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...
2013,Urpelainen and Yang 2018,Uganda,2006.0,0,1,1,0,1,0,1,1
2012,Urpelainen and Yang 2018,Swaziland,2006.0,0,0,0,0,0,0,0,0
2011,Urpelainen and Yang 2018,Mozambique,2006.0,1,0,0,0,1,0,1,1
2020,Urpelainen and Yang 2018,Russian Federation,2006.0,1,0,0,1,1,0,1,1


In [6]:
panel_data_assumptions = panel_data.copy()
country_list = panel_data["country"].unique()
for country_select in country_list:
    country_data = panel_data.query("country==@country_select")
    country_maxyear = country_data["year"].max()
    year_gap = 2022-country_maxyear
    maxyear_data = country_data.query("year==@country_maxyear")
    for new_year in np.arange(year_gap)+country_maxyear+1:
        maxyear_data.loc[:,"year"] = new_year
        maxyear_data.loc[:,"source"] = "assumption"
        panel_data_assumptions = pd.concat([panel_data_assumptions,maxyear_data])
panel_data_assumptions

Unnamed: 0,source,country,year,R_IndepProducer,R_Private,R_Unbundle,R_WholeSale,R_IndepReg,R_Choice,R_Liberalization,R_Corp
5039,Erdogdu 2011,Greece,1999.0,0,0,1,0,1,0,1,0
5040,Erdogdu 2011,Greece,2000.0,0,0,1,0,1,0,1,1
5041,Erdogdu 2011,Greece,2001.0,0,0,1,0,1,1,1,1
5042,Erdogdu 2011,Greece,2002.0,0,1,1,0,1,1,1,1
5043,Erdogdu 2011,Greece,2003.0,0,1,1,0,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...
2501,assumption,Mozambique,2018.0,1,0,0,0,1,0,1,1
2501,assumption,Mozambique,2019.0,1,0,0,0,1,0,1,1
2501,assumption,Mozambique,2020.0,1,0,0,0,1,0,1,1
2501,assumption,Mozambique,2021.0,1,0,0,0,1,0,1,1


In [7]:
panel_data_assumptions.set_index(["country","year"]).to_csv("_all_temporal_power_reform.csv")