In [116]:
import os

import numpy as np
import pandas as pd
import yaml

In [11]:
os.listdir("./data/toktarova_et_al_2019/")

['Population.csv',
 'ElectricityperCapita.csv',
 'GDPperCap 2015.5.2050.10.2100.csv',
 'Real load hourly data.csv',
 'AnnualDemand.csv',
 'Data.csv',
 'GDPperCapita.csv',
 'Firstdayofyear.csv',
 'Peak.csv',
 'References.csv']

In [49]:
with open("./data/gegis__all_countries.yaml", "r") as file:
    data = yaml.safe_load(file)

In [70]:
items = data["items"]
gegis_countries = [[item["country_name"], item["country_code"]] for item in items]

In [125]:
gegis_country_codes = np.array(gegis_countries).T[1]

In [136]:
gegis_countries

[['Austria', 'AT'],
 ['Belgium', 'BE'],
 ['Bosnia and Herzegovina', 'BA'],
 ['Bulgaria', 'BG'],
 ['Croatia', 'HR'],
 ['Cyprus', 'CY'],
 ['Czech Republic', 'CZ'],
 ['Denmark', 'DK'],
 ['Estonia', 'EE'],
 ['Finland', 'FI'],
 ['France', 'FR'],
 ['Germany', 'DE'],
 ['Greece', 'GR'],
 ['Hungary', 'HU'],
 ['Iceland', 'IS'],
 ['Ireland', 'IE'],
 ['Italy', 'IT'],
 ['Latvia', 'LV'],
 ['Lithuania', 'LT'],
 ['Netherlands', 'NL'],
 ['North Macedonia', 'MK'],
 ['Norway', 'NO'],
 ['Poland', 'PL'],
 ['Portugal', 'PT'],
 ['Romania', 'RO'],
 ['Serbia', 'RS'],
 ['Slovakia', 'SK'],
 ['Slovenia', 'SI'],
 ['Spain', 'ES'],
 ['Sweden', 'SE'],
 ['Switzerland', 'CH'],
 ['United Kingdom', 'GB'],
 ['Canada', 'CA'],
 ['Mexico', 'MX'],
 ['Argentina', 'AR'],
 ['Brazil', 'BR'],
 ['Chile', 'CL'],
 ['Australia', 'AU'],
 ['New Zealand', 'NZ'],
 ['Kenya', 'KE'],
 ['Saudi Arabia', 'SA'],
 ['South Korea', 'KR'],
 ['Sri Lanka', 'LK'],
 ['Türkiye', 'TR']]

## Features

"""

We take time series of hourly electricity demand for 44 countries from Toktarova et al. [15] and fit a gradient boosting regression model [32] to demand time series for each country normalized to their annual mean

Estimates of annual country-level annual electricity generation in 2050 were produced by extrapolating annual demand in 2016 [33] using regional demand growth in the SSP2-26 scenario

Therefore we chose to train our model on ten independent var­
iables: 

(i + ii) annual per-capita electricity demand and purchase-power adjusted GDP (for prediction, we extrapolated this to 2050 using the SSP2 scenario in a similar way to demand as above), 

(iii) average hourly temperature profiles over the year in the 3 most densely populated areas of each country [35,36], 

(iv) the mean annual temperature level,

(v) the 1st temperature percentile across the year (to represent how low the temperature dips go),

(vi) the 99th percentile (to represent how high temperature spikes go),

(vii) hour of the day,

(viii) a weekday/weekend indicator,

(ix) mean monthly temperature levels, and

(x) a temperature-based ranking of months of the year
(where the first month is the coldest month, and the month ranked last is the warmest across the year).
The temperature ranking of months was chosen in order to reflect that different countries have summer in different calendar months.

"""



### (i + ii) annual per-capita electricity demand and purchase-power adjusted GDP

In [105]:
temp = pd.read_csv(
    "./data/toktarova_et_al_2019/ElectricityperCapita.csv", index_col=0, header=1
)

In [106]:
temp.columns = [
    int(float(col_name)) if col_name.split(".")[0].isdigit() else col_name
    for col_name in temp.columns
]

In [107]:
temp[2015].count()

np.int64(183)

In [161]:
import pycountry


def search_pycountry(country_name: str) -> str | None:
    try:
        # Try to find the country
        country = pycountry.countries.search_fuzzy(country_name)[0]
        return country.alpha_2
    except LookupError:
        return None


def get_country_codes(country_names):
    country_codes = []
    for country_name in country_names:
        found_country_code = search_pycountry(country_name)
        if not (found_country_code):
            print(country_name)

        country_codes.append(found_country_code)
    return country_codes

In [162]:
temp.insert(1, "country_code", get_country_codes(temp["Countries"]))

Area under dispute
Bahamas. The
Bosnia-Herzegovina
Bouvet Island (uninhabited)
Myanmar former Burma
Cambodia (Formerly Kampuchea)
Cape Verde
Congo. Democratic Republic of the (Formerly Zaire)
Congo. Republic of the
Crozet Islands (France)
Falkland Islands (Islas Malvinas)
French Guiana (France)
Gambia. The
Guadeloupe (France)
Guinea - Bissau
Ile Amsterdam (France)
Ivory Coast
Kerguelen Islands (France)
Korea. Democratic Peoples Republic
Korea. Republic of
Martinique (France)
Vanuatu former New Hebrides
Norfolk Island (Australia)
Prince Edward Islands (South Africa)
Reunion (France)
St. Helena
St. Lucia
St. Vincent and the Grenadines
Swaziland
Tanzania. United Republic of
Turkey
Upper Volta - Burkina Faso
West Bank
Serbia (former Yugoslavia)
Macau Region of China


ValueError: cannot insert country_code, already exists

In [178]:
# Print gegis countries not found by search
for code in gegis_country_codes:
    if code not in temp["country_code"].values:
        print(code)

In [180]:
# Adjust the missing countries
index_cc_BA = temp[temp["Countries"] == "Bosnia-Herzegovina"].index
temp.loc[index_cc_BA, "country_code"] = "BA"

index_cc_RS = temp[temp["Countries"] == "Serbia (former Yugoslavia)"].index
temp.loc[index_cc_RS, "country_code"] = "RS"

index_cc_KR = temp[temp["Countries"] == "Korea. Republic of"].index
temp.loc[index_cc_KR, "country_code"] = "KR"

index_cc_TR = temp[temp["Countries"] == "Turkey"].index
temp.loc[index_cc_TR, "country_code"] = "TR"

In [211]:
list_ids_gegis = []
for i in range(len(temp)):
    current_row = temp.iloc[i]

    if current_row["country_code"] in gegis_country_codes:
        list_ids_gegis += [i]

In [215]:
temp3 = temp.iloc[list_ids_gegis]

In [223]:
temp3

Unnamed: 0_level_0,Countries,country_code,Continents,SRegion,2009,2010,2011,2012,2013,2014,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9,Argentina,AR,Americas,S America,,,,,,,...,17817.599974,17846.5886,17873.543992,17898.583936,17921.822999,17943.372052,17963.337885,17981.822932,17998.925069,18019.564046
11,Australia,AU,Asia-Pacific,SE Asia,,,,,,,...,17980.901572,17989.396333,17997.304231,18004.665295,18011.516893,18017.893902,18023.828862,18029.352125,18034.492001,18039.274886
12,Austria,AT,Europe,Europe,,,,,,,...,17924.553357,17942.204532,17958.868722,17974.599098,17989.446194,18003.458012,18016.680125,18029.155779,18040.925994,18052.029661
19,Belgium,BE,Europe,Europe,,,,,,,...,17783.507912,17819.302856,17853.022893,17884.777818,17914.672889,17942.808845,17969.281964,17994.18413,18017.602927,18039.621738
25,Bosnia-Herzegovina,BA,Europe,Europe,,,,,,,...,17693.173142,17743.94103,17791.206669,17835.180774,17876.065808,17914.055644,17949.335356,17982.08112,18012.460199,18045.459664
28,Brazil,BR,Americas,S America,,,,,,,...,17696.009321,17745.427713,17791.469267,17834.333642,17874.213166,17911.29243,17945.74802,17977.748341,18007.453558,18039.8438
30,Bulgaria,BG,Europe,Europe,,,,,,,...,17782.012493,17818.503564,17852.454161,17884.013937,17913.327632,17940.534637,17965.768671,17989.157559,18010.823093,18035.708841
35,Canada,CA,Americas,N America,,,,,,,...,18028.235699,18030.014957,18031.624002,18033.079088,18034.394918,18035.584799,18036.660768,18037.633718,18038.5135,18039.309026
39,Chile,CL,Americas,S America,,,,,,,...,17369.496576,17457.328113,17541.363795,17621.744323,17698.610267,17772.101136,17842.354593,17909.505797,17973.686877,18039.8547
46,Croatia,HR,Europe,Europe,,,,,,,...,17626.277329,17675.46142,17721.539037,17764.676729,17805.036618,17842.775804,17878.045903,17910.992707,17941.755935,17975.292


In [228]:
temp3 = temp3.drop([207, 211, 220])

In [229]:
temp3

Unnamed: 0_level_0,Countries,country_code,Continents,SRegion,2009,2010,2011,2012,2013,2014,...,2091,2092,2093,2094,2095,2096,2097,2098,2099,2100
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
9,Argentina,AR,Americas,S America,,,,,,,...,17817.599974,17846.5886,17873.543992,17898.583936,17921.822999,17943.372052,17963.337885,17981.822932,17998.925069,18019.564046
11,Australia,AU,Asia-Pacific,SE Asia,,,,,,,...,17980.901572,17989.396333,17997.304231,18004.665295,18011.516893,18017.893902,18023.828862,18029.352125,18034.492001,18039.274886
12,Austria,AT,Europe,Europe,,,,,,,...,17924.553357,17942.204532,17958.868722,17974.599098,17989.446194,18003.458012,18016.680125,18029.155779,18040.925994,18052.029661
19,Belgium,BE,Europe,Europe,,,,,,,...,17783.507912,17819.302856,17853.022893,17884.777818,17914.672889,17942.808845,17969.281964,17994.18413,18017.602927,18039.621738
25,Bosnia-Herzegovina,BA,Europe,Europe,,,,,,,...,17693.173142,17743.94103,17791.206669,17835.180774,17876.065808,17914.055644,17949.335356,17982.08112,18012.460199,18045.459664
28,Brazil,BR,Americas,S America,,,,,,,...,17696.009321,17745.427713,17791.469267,17834.333642,17874.213166,17911.29243,17945.74802,17977.748341,18007.453558,18039.8438
30,Bulgaria,BG,Europe,Europe,,,,,,,...,17782.012493,17818.503564,17852.454161,17884.013937,17913.327632,17940.534637,17965.768671,17989.157559,18010.823093,18035.708841
35,Canada,CA,Americas,N America,,,,,,,...,18028.235699,18030.014957,18031.624002,18033.079088,18034.394918,18035.584799,18036.660768,18037.633718,18038.5135,18039.309026
39,Chile,CL,Americas,S America,,,,,,,...,17369.496576,17457.328113,17541.363795,17621.744323,17698.610267,17772.101136,17842.354593,17909.505797,17973.686877,18039.8547
46,Croatia,HR,Europe,Europe,,,,,,,...,17626.277329,17675.46142,17721.539037,17764.676729,17805.036618,17842.775804,17878.045903,17910.992707,17941.755935,17975.292


In [37]:
temp1 = pd.read_csv(
    "./data/toktarova_et_al_2019/GDPperCapita.csv", index_col=0, header=1
)

In [38]:
temp1.columns = [
    int(float(col_name)) if col_name.split(".")[0].isdigit() else col_name
    for col_name in temp1.columns
]

In [45]:
temp1[2015].count()

np.int64(209)

### (iii) average hourly temperature profiles over the year in the 3 most densely populated areas of each country [35,36]

In [None]:
# ETL/temperature

### (iv) the mean annual temperature level

In [None]:
# ETL/temperature

### (v) the 1st temperature percentile across the year (to represent how low the temperature dips go)

In [None]:
# ETL/temperature

### (vi) the 99th percentile (to represent how high temperature spikes go)

In [None]:
# ETL/temperature

### (vii) hour of the day

### (viii) a weekday/weekend indicator

### (ix) mean monthly temperature levels

In [None]:
# ETL/temperature

### (x) a temperature-based ranking of months of the year (where the first month is the coldest month, and the month ranked last is the warmest across the year)

In [None]:
# ETL/temperature

In [9]:
pd.read_csv("./data/toktarova_et_al_2019/GDPperCapita.csv", index_col=0, header=1)

Unnamed: 0_level_0,Countries,Continents,SRegion,1990.0,1991.0,1992.0,1993.0,1994.0,1995.0,1996.0,...,2091.0,2092.0,2093.0,2094.0,2095.0,2096.0,2097.0,2098.0,2099.0,2100.0
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Afghanistan,Asia-Pacific,India_SAARC,,,,,,,,...,84513.648970,84945.368034,85344.057644,85711.947230,86051.160167,86363.711951,86651.509728,86916.352953,87159.935007,87418.360257
2,Albania,Europe,Europe,3235.619467,2292.006978,2139.920427,2359.707076,2571.299117,2931.333042,3218.022703,...,85632.313190,85965.214251,86276.694494,86567.867757,86839.827593,87093.641755,87330.347663,87550.948770,87756.411756,87982.401824
3,Algeria,Africa,MENA,7736.104268,7458.505444,7417.711051,7103.806066,6896.890897,7024.660080,7186.481519,...,86311.132947,86553.488069,86778.749769,86987.917711,87181.962366,87361.821423,87528.396981,87682.553467,87825.116207,87991.611560
4,Andorra,Europe,Europe,,,,,,,,...,84182.763346,84684.375116,85167.544889,85632.832949,86080.816042,86512.080994,86927.219137,87326.821476,87711.474575,88116.547416
5,Angola,Africa,Africa,,,,,,,,...,82589.887519,83287.028727,83954.491295,84593.112591,85203.764989,85787.346420,86344.771879,86876.965871,87384.855756,87904.072386
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,Samoa,Asia-Pacific,SE Asia,2743.203225,2660.376848,2631.082460,2712.124879,2618.437331,2770.660768,2950.181791,...,81547.532398,82412.875569,83237.250429,84021.811109,84767.780972,85476.436565,86149.093035,86787.091008,87391.784915,87999.276720
220,Sint Maarten,Americas,N America,,,,,,,,...,86134.938849,86398.054441,86643.944262,86873.503845,87087.615654,87287.143847,87472.929933,87645.789273,87806.508338,87990.583241
221,Timor-Leste,Asia-Pacific,SE Asia,,,,,,,,...,84603.396536,85038.608438,85440.475560,85811.251593,86153.083034,86468.007270,86757.952051,87024.736134,87270.070901,87530.121566
222,Tokelau,Asia-Pacific,SE Asia,,,,751.879699,863.402877,980.106338,1098.091199,...,85002.436203,85347.923816,85675.948347,85987.595994,86283.910997,86565.893557,86834.498405,87090.633968,87335.162047,87599.048150
