In [1]:
import os

import numpy as np
import pandas as pd
import yaml

In [2]:
os.listdir("./data/toktarova_et_al_2019/")

['Population.csv',
 'ElectricityperCapita.csv',
 'GDPperCap 2015.5.2050.10.2100.csv',
 'Real load hourly data.csv',
 'AnnualDemand.csv',
 'Data.csv',
 'GDPperCapita.csv',
 'Firstdayofyear.csv',
 'Peak.csv',
 'References.csv']

In [3]:
with open("./data/gegis__all_countries.yaml", "r") as file:
    data = yaml.safe_load(file)

In [4]:
items = data["items"]
gegis_countries = [[item["country_name"], item["country_code"]] for item in items]

In [5]:
gegis_country_codes = np.array(gegis_countries).T[1]

In [6]:
gegis_countries

[['Austria', 'AT'],
 ['Belgium', 'BE'],
 ['Bosnia and Herzegovina', 'BA'],
 ['Bulgaria', 'BG'],
 ['Croatia', 'HR'],
 ['Cyprus', 'CY'],
 ['Czech Republic', 'CZ'],
 ['Denmark', 'DK'],
 ['Estonia', 'EE'],
 ['Finland', 'FI'],
 ['France', 'FR'],
 ['Germany', 'DE'],
 ['Greece', 'GR'],
 ['Hungary', 'HU'],
 ['Iceland', 'IS'],
 ['Ireland', 'IE'],
 ['Italy', 'IT'],
 ['Latvia', 'LV'],
 ['Lithuania', 'LT'],
 ['Netherlands', 'NL'],
 ['North Macedonia', 'MK'],
 ['Norway', 'NO'],
 ['Poland', 'PL'],
 ['Portugal', 'PT'],
 ['Romania', 'RO'],
 ['Serbia', 'RS'],
 ['Slovakia', 'SK'],
 ['Slovenia', 'SI'],
 ['Spain', 'ES'],
 ['Sweden', 'SE'],
 ['Switzerland', 'CH'],
 ['United Kingdom', 'GB'],
 ['Canada', 'CA'],
 ['Mexico', 'MX'],
 ['Argentina', 'AR'],
 ['Brazil', 'BR'],
 ['Chile', 'CL'],
 ['Australia', 'AU'],
 ['New Zealand', 'NZ'],
 ['Kenya', 'KE'],
 ['Saudi Arabia', 'SA'],
 ['South Korea', 'KR'],
 ['Sri Lanka', 'LK'],
 ['Türkiye', 'TR']]

## Features

"""

We take time series of hourly electricity demand for 44 countries from Toktarova et al. [15] and fit a gradient boosting regression model [32] to demand time series for each country normalized to their annual mean

Estimates of annual country-level annual electricity generation in 2050 were produced by extrapolating annual demand in 2016 [33] using regional demand growth in the SSP2-26 scenario

Therefore we chose to train our model on ten independent var­
iables: 

(i + ii) annual per-capita electricity demand and purchase-power adjusted GDP (for prediction, we extrapolated this to 2050 using the SSP2 scenario in a similar way to demand as above), 

(iii) average hourly temperature profiles over the year in the 3 most densely populated areas of each country [35,36], 

(iv) the mean annual temperature level,

(v) the 1st temperature percentile across the year (to represent how low the temperature dips go),

(vi) the 99th percentile (to represent how high temperature spikes go),

(vii) hour of the day,

(viii) a weekday/weekend indicator,

(ix) mean monthly temperature levels, and

(x) a temperature-based ranking of months of the year
(where the first month is the coldest month, and the month ranked last is the warmest across the year).
The temperature ranking of months was chosen in order to reflect that different countries have summer in different calendar months.

"""



### (i + ii) annual per-capita electricity demand and purchase-power adjusted GDP

In [25]:
ElectricityPerCapita = pd.read_csv(
    "./data/toktarova_et_al_2019/ElectricityperCapita.csv", index_col=0, header=1
)

In [31]:
ElectricityPerCapita.columns = [
    int(float(col_name)) if col_name.split(".")[0].isdigit() else col_name
    for col_name in ElectricityPerCapita.columns
]

In [33]:
ElectricityPerCapita.head()

Unnamed: 0_level_0,Countries,Continents,SRegion,2009,2010,2011,2012,2013,2014,2015,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Afghanistan,Asia-Pacific,India_SAARC,,,,,137.101712,133.966935,144.395483,...,17624.62496,17687.097102,17744.647413,17797.642291,17846.424921,17891.316151,17932.615413,17970.601709,18005.534605,18042.450631
2,Albania,Europe,Europe,,,,2643.873655,2627.239093,2659.396326,2784.365974,...,17707.115074,17754.295409,17798.312225,17839.347976,17877.5789,17913.174581,17946.297631,17977.103475,18005.74024,18037.176698
3,Algeria,Africa,MENA,,,,,,1417.816396,1589.823258,...,17803.171328,17837.323936,17869.000999,17898.357409,17925.541849,17950.696535,17973.957043,17995.452226,18015.304196,18038.456462
4,Andorra,Europe,Europe,,,,,,,,...,,,,,,,,,,
5,Angola,Africa,Africa,,,,,,,377.14811,...,17269.345036,17370.709694,17467.169776,17558.924431,17646.171148,17729.104683,17807.916168,17882.792373,17953.915119,18026.28791


In [36]:
import pycountry


def search_pycountry(country_name: str) -> str | None:
    try:
        # Try to find the country
        country = pycountry.countries.search_fuzzy(country_name)[0]
        return country.alpha_2
    except LookupError:
        return None


def get_country_codes(country_names):
    country_codes = []
    for country_name in country_names:
        found_country_code = search_pycountry(country_name)
        if not (found_country_code):
            print("Not Found:",country_name)

        country_codes.append(found_country_code)
    return country_codes

In [37]:
ElectricityPerCapita.insert(1, "country_code", get_country_codes(temp["Countries"]))

Not Found: Area under dispute
Not Found: Bahamas. The
Not Found: Bosnia-Herzegovina
Not Found: Bouvet Island (uninhabited)
Not Found: Myanmar former Burma
Not Found: Cambodia (Formerly Kampuchea)
Not Found: Cape Verde
Not Found: Congo. Democratic Republic of the (Formerly Zaire)
Not Found: Congo. Republic of the
Not Found: Crozet Islands (France)
Not Found: Falkland Islands (Islas Malvinas)
Not Found: French Guiana (France)
Not Found: Gambia. The
Not Found: Guadeloupe (France)
Not Found: Guinea - Bissau
Not Found: Ile Amsterdam (France)
Not Found: Ivory Coast
Not Found: Kerguelen Islands (France)
Not Found: Korea. Democratic Peoples Republic
Not Found: Korea. Republic of
Not Found: Martinique (France)
Not Found: Vanuatu former New Hebrides
Not Found: Norfolk Island (Australia)
Not Found: Prince Edward Islands (South Africa)
Not Found: Reunion (France)
Not Found: St. Helena
Not Found: St. Lucia
Not Found: St. Vincent and the Grenadines
Not Found: Swaziland
Not Found: Tanzania. United Re

ValueError: cannot insert country_code, already exists

In [39]:
# Print gegis countries not found by search
for code in gegis_country_codes:
    if code not in ElectricityPerCapita["country_code"].values:
        print(code)

BA
RS
KR
TR


In [40]:
# Adjust the missing countries
index_cc_BA = ElectricityPerCapita[ElectricityPerCapita["Countries"] == "Bosnia-Herzegovina"].index
ElectricityPerCapita.loc[index_cc_BA, "country_code"] = "BA"

index_cc_RS = ElectricityPerCapita[ElectricityPerCapita["Countries"] == "Serbia (former Yugoslavia)"].index
ElectricityPerCapita.loc[index_cc_RS, "country_code"] = "RS"

index_cc_KR = ElectricityPerCapita[ElectricityPerCapita["Countries"] == "Korea. Republic of"].index
ElectricityPerCapita.loc[index_cc_KR, "country_code"] = "KR"

index_cc_TR = ElectricityPerCapita[ElectricityPerCapita["Countries"] == "Turkey"].index
ElectricityPerCapita.loc[index_cc_TR, "country_code"] = "TR"

In [41]:
list_ids_gegis = []
for i in range(len(ElectricityPerCapita)):
    current_row = ElectricityPerCapita.iloc[i]

    if current_row["country_code"] in gegis_country_codes:
        list_ids_gegis += [i]

In [42]:
ElectricityPerCapita = ElectricityPerCapita.iloc[list_ids_gegis]

In [None]:
# Drop manually found incorrect countries (usually territories)
ElectricityPerCapita = ElectricityPerCapita.drop([207, 211, 220])

In [63]:
ElectricityPerCapita[['country_code', 2015]]

Unnamed: 0_level_0,country_code,2015
Code,Unnamed: 1_level_1,Unnamed: 2_level_1
9,AR,3042.742899
11,AU,8985.049043
12,AT,8147.214043
19,BE,7542.514249
25,BA,3245.548163
28,BR,2795.079254
30,BG,5401.97
35,CA,9527.951473
39,CL,3861.453167
46,HR,4054.511439


In [66]:
toktarova_indicies = ElectricityPerCapita.index

In [67]:
GDP_PPP = pd.read_csv(
    "./data/toktarova_et_al_2019/GDPperCapita.csv", index_col=0, header=1
)

In [68]:
GDP_PPP.columns = [
    int(float(col_name)) if col_name.split(".")[0].isdigit() else col_name
    for col_name in GDP_PPP.columns
]

In [69]:
GDP_PPP[2015].count()

np.int64(209)

In [97]:
GDP_PPP.loc[toktarova_indicies]

Unnamed: 0_level_0,Countries,Continents,SRegion,1990,1991,1992,1993,1994,1995,1996,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9,Argentina,Americas,S America,,,,,,,,...,86413.457614,86619.323619,86811.093372,86989.533276,87155.396873,87309.420359,87452.318903,87584.783702,87707.479717,87855.731355
11,Australia,Asia-Pacific,SE Asia,21482.763579,21130.110814,20958.563029,21597.178853,22232.676622,22817.776245,23409.297232,...,87578.177439,87639.100855,87695.84549,87748.692355,87797.90432,87843.727181,87886.39068,87926.109479,87963.084078,87997.501699
12,Austria,Europe,Europe,23393.185071,23957.8933,24191.744461,24119.470637,24603.973307,25221.75763,25791.807074,...,87174.900482,87301.069941,87420.316407,87532.998481,87639.458551,87740.023254,87835.003961,87924.697275,88009.385545,88089.337374
19,Belgium,Europe,Europe,22865.408104,23198.184791,23457.903718,23141.691297,23814.95685,24331.872447,24671.264297,...,86171.83597,86425.540285,86665.069264,86891.112496,87104.338146,87305.392073,87494.897206,87673.453135,87841.635922,87999.99806
25,Bosnia-Herzegovina,Europe,Europe,,,,,1084.513725,1348.247015,2593.498114,...,85534.130159,85892.069162,86226.353699,86538.264126,86829.051377,87099.931519,87352.081358,87586.635015,87804.681374,88042.023208
28,Brazil,Americas,S America,7729.221784,7716.592593,7558.855556,7789.506487,8079.693918,8307.245293,8358.32063,...,85554.096326,85902.568383,86228.213726,86532.2471,86815.858313,87080.206573,87326.415876,87555.571365,87768.716559,88001.596307
30,Bulgaria,Europe,Europe,6980.525383,6454.607793,6049.822314,6008.005126,6138.039901,6341.782867,6476.567,...,86161.249459,86419.868805,86661.025015,86885.669504,87094.734335,87289.127486,87469.728998,87637.387933,87792.92007,87971.839437
35,Canada,Americas,N America,23430.881379,22624.182843,22539.124513,22875.098806,23688.034757,24136.015554,24278.472451,...,87918.079902,87930.876979,87942.451103,87952.918804,87962.385575,87970.946899,87978.68919,87985.690652,87992.022048,87997.747404
39,Chile,Americas,S America,6950.495901,7384.83522,8161.432039,8597.436228,8952.188203,9759.865331,10336.287728,...,83278.659754,83886.208532,84470.606062,85032.460889,85572.409638,86091.109504,86589.231537,87067.454722,87526.4608,88001.674762
46,Croatia,Europe,Europe,,,,,,9431.114225,10373.896108,...,85064.230755,85409.523893,85733.982013,86038.597969,86324.355858,86592.224816,86843.153792,87078.067236,87297.861622,87537.964615


### (iii) average hourly temperature profiles over the year in the 3 most densely populated areas of each country [35,36]

In [None]:
# ETL/temperature

### (iv) the mean annual temperature level

In [None]:
# ETL/temperature

### (v) the 1st temperature percentile across the year (to represent how low the temperature dips go)

In [None]:
# ETL/temperature

### (vi) the 99th percentile (to represent how high temperature spikes go)

In [None]:
# ETL/temperature

### (vii) hour of the day

### (viii) a weekday/weekend indicator

### (ix) mean monthly temperature levels

In [None]:
# ETL/temperature

### (x) a temperature-based ranking of months of the year (where the first month is the coldest month, and the month ranked last is the warmest across the year)

In [None]:
# ETL/temperature

In [9]:
pd.read_csv("./data/toktarova_et_al_2019/GDPperCapita.csv", index_col=0, header=1)

Unnamed: 0_level_0,Countries,Continents,SRegion,1990.0,1991.0,1992.0,1993.0,1994.0,1995.0,1996.0,...,2091.0,2092.0,2093.0,2094.0,2095.0,2096.0,2097.0,2098.0,2099.0,2100.0
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Afghanistan,Asia-Pacific,India_SAARC,,,,,,,,...,84513.648970,84945.368034,85344.057644,85711.947230,86051.160167,86363.711951,86651.509728,86916.352953,87159.935007,87418.360257
2,Albania,Europe,Europe,3235.619467,2292.006978,2139.920427,2359.707076,2571.299117,2931.333042,3218.022703,...,85632.313190,85965.214251,86276.694494,86567.867757,86839.827593,87093.641755,87330.347663,87550.948770,87756.411756,87982.401824
3,Algeria,Africa,MENA,7736.104268,7458.505444,7417.711051,7103.806066,6896.890897,7024.660080,7186.481519,...,86311.132947,86553.488069,86778.749769,86987.917711,87181.962366,87361.821423,87528.396981,87682.553467,87825.116207,87991.611560
4,Andorra,Europe,Europe,,,,,,,,...,84182.763346,84684.375116,85167.544889,85632.832949,86080.816042,86512.080994,86927.219137,87326.821476,87711.474575,88116.547416
5,Angola,Africa,Africa,,,,,,,,...,82589.887519,83287.028727,83954.491295,84593.112591,85203.764989,85787.346420,86344.771879,86876.965871,87384.855756,87904.072386
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,Samoa,Asia-Pacific,SE Asia,2743.203225,2660.376848,2631.082460,2712.124879,2618.437331,2770.660768,2950.181791,...,81547.532398,82412.875569,83237.250429,84021.811109,84767.780972,85476.436565,86149.093035,86787.091008,87391.784915,87999.276720
220,Sint Maarten,Americas,N America,,,,,,,,...,86134.938849,86398.054441,86643.944262,86873.503845,87087.615654,87287.143847,87472.929933,87645.789273,87806.508338,87990.583241
221,Timor-Leste,Asia-Pacific,SE Asia,,,,,,,,...,84603.396536,85038.608438,85440.475560,85811.251593,86153.083034,86468.007270,86757.952051,87024.736134,87270.070901,87530.121566
222,Tokelau,Asia-Pacific,SE Asia,,,,751.879699,863.402877,980.106338,1098.091199,...,85002.436203,85347.923816,85675.948347,85987.595994,86283.910997,86565.893557,86834.498405,87090.633968,87335.162047,87599.048150


In [74]:
Hourly_demand = pd.read_csv(
    "./data/toktarova_et_al_2019/Real load hourly data.csv", index_col=0, header=0
)

In [77]:
Hourly_demand = Hourly_demand.T

In [85]:
Hourly_demand.index = Hourly_demand.index.astype(int)

In [89]:
sel_hourly_demand = Hourly_demand.loc[toktarova_indicies]

In [96]:
sel_hourly_demand[["Countryname","Electricity consumption"]].to_csv("list_countries_with_electricity_consumption_year.csv")

In [86]:
Hourly_demand

Unnamed: 0,Electricity consumption,Countryname,annual electricity consumption in TWh,average,R,Psyn,Preal,Hour_1 in MW,Hour_2 in MW,Hour_3 in MW,...,Hour_8751 in MW,Hour_8752 in MW,Hour_8753 in MW,Hour_8754 in MW,Hour_8755 in MW,Hour_8756 in MW,Hour_8757 in MW,Hour_8758 in MW,Hour_8759 in MW,Hour_8760 in MW
9,2015,Argentina,132.10676843799982,7194.2214905662095,0.9827962860288826,18026.11012055636,22051.341,13765.631,13466.951,13249.337,...,16999.631,17247.07,17148.829,16822.383,16371.513,16074.913,16454.143,16705.517,16611.579,15959.58
11,2015,Australia,215.36264050533973,24584.77631339495,0.9893989009754218,30344.20260126831,34544.7113212168,21947.63239,20283.9105,19365.89441,...,29117.62388,29997.5354,30326.7462,29212.2839,27799.04454,26874.34483,25701.1257,24830.47366,24470.61353,24246.23002
12,2015,Austria,69.617944,7947.253881278539,0.9889000629120096,11022.837679880084,11386.0,7247.0,6908.0,6601.0,...,7887.0,8044.0,8733.0,8847.0,8554.0,8039.0,7521.0,7223.0,7424.0,7094.0
17,2010,Barbados,1.0546712445065285,120.39626078841648,0.985744195956641,155.9419880787268,163.0,103.6,98.6,95.6,...,119.0,115.9,111.4,124.1,134.0,131.5,127.4,121.8,114.9,107.0
19,2015,Belgium,85.2228685,9728.637956621003,0.991906250654052,13013.468483514236,13129.000000000002,9555.0,9094.0,8691.0,...,9035.0,9097.0,9262.0,10011.0,9917.0,9377.0,8908.0,8682.0,9045.0,9176.0
25,2015,Bosnia-Herzegovina,12.3655385,1411.5911529680366,0.9894344602054602,2100.0651604943746,2105.0,1602.0,1519.0,1423.0,...,1788.0,1844.0,1974.0,2105.0,2060.0,1959.0,1810.0,1680.0,1606.0,1537.0
28,2015,Brazil,580.9516327978005,66318.6795431279,0.9901910906649112,87115.4480575607,91293.97,58525.56674,57847.67414,56891.77427,...,63352.09936,63226.69896,62905.86149,62606.72004,62084.45965,64825.87288,70634.19365,69079.20053,65205.12105,61603.77611
30,2015,Bulgaria,38.6240855,4409.142180365297,0.9871239841938952,6381.464415208395,7100.0,5259.0,5024.0,4773.0,...,5719.0,5810.0,6161.0,6754.0,6784.0,6353.0,5740.0,5273.0,5161.0,5105.0
35,2015,Canada,342.43457594007134,39090.70501598988,0.9884755677123198,52114.62435137231,53900.0,37872.02837,36743.60425,35680.41681,...,39612.31291,39790.96763,41153.68952,43031.90915,42859.01062,41622.92464,40045.51862,38353.41542,37020.54038,35718.79133
39,2015,Chile,69.30536143611195,3208.6764809388264,0.9909914911204276,9109.035120115446,9093.61193710125,7663.56744,7806.132327,7837.997731,...,7981.17992,7917.465739,7968.251103,8074.944783,8060.790234,7940.909423,8022.197817,8302.669904,8167.573888,7695.982555
