In [603]:
import json
import os.path
import pandas as pd
import requests
import wbgapi as wb
import apikeys
import functions as func

pd.set_option('future.no_silent_downcasting', True)

# file paths for renewable energy data
data_files = {
    'df_energy_prod': 'data/ren_ren_65-22/03_modern-renewable-prod.csv',
    'df_cons': 'data/con-energy-fossil-renewables-nuclear.csv',
    'df_prod': 'data/prod-electricity-fossil-renewables-nuclear-line.csv'
}
url_owid = 'data/owid-energy-data.csv'

# country iso codes
with open('data/countries.txt', 'r') as f:
    countries = tuple(f.read().split(','))

# country names
with open('data/country_list.txt', 'r') as f:
    country_list = list(f.read().split(','))

# owid columns
with open('data/columns.txt', 'r') as f:
    owid_columns = list(f.read().split(','))
    
# load dfs dynamically
df_dict = {name: pd.read_csv(path) for name, path in data_files.items()}

df_owid = pd.read_csv(url_owid)

df_cons = pd.read_csv('data/con-energy-fossil-renewables-nuclear.csv')
df_prod = pd.read_csv('data/prod-electricity-fossil-renewables-nuclear-line.csv')

# make sure all dfs exist
func.ensure_dataframes_exist(df_dict, list(data_files.keys()) + ['df_energy_prod_world', 'df_energy_prod_regions', 'df_countries', 'df_energy', 'df_ember', 'df_wb'])

In [713]:
df_population = df_owid[['country','year', 'iso_code', 'population']]

In [581]:
top10 = df_owid[df_owid['year']==2023].nlargest(10, 'renewables_share_elec')
top10

Unnamed: 0,country,year,iso_code,population,gdp,biofuel_cons_change_pct,biofuel_cons_change_twh,biofuel_cons_per_capita,biofuel_consumption,biofuel_elec_per_capita,...,solar_share_elec,solar_share_energy,wind_cons_change_pct,wind_cons_change_twh,wind_consumption,wind_elec_per_capita,wind_electricity,wind_energy_per_capita,wind_share_elec,wind_share_energy
15689,Paraguay,2023,PRY,6861527.0,,,,,,0.0,...,0.0,,,,,0.0,0.0,,0.0,
9204,Iceland,2023,ISL,375322.0,,2.747,0.005,538.428,0.202,,...,0.0,,26.254,0.004,0.019,19.257,0.007,50.023,0.036,
14676,Norway,2023,NOR,5474363.0,,64.176,2.562,1197.159,6.554,43.841,...,0.111,0.242,-5.704,-2.335,36.276,2732.738,14.96,6626.515,9.74,6.559
4883,Costa Rica,2023,CRI,5212178.0,,,,,,32.616,...,0.584,,,,,282.032,1.47,,12.26,
10365,Kenya,2023,KEN,55100588.0,,,,,,4.356,...,4.016,,,,,36.297,2.0,,16.393,
11292,Luxembourg,2023,LUX,654771.0,,3.288,0.049,2339.919,1.532,320.723,...,24.561,2.089,48.891,0.394,1.208,748.353,0.49,1844.264,42.982,3.216
20777,Uruguay,2023,URY,3423106.0,,,,,,885.161,...,3.485,,,,,1384.707,4.74,,35.909,
3060,Brazil,2023,BRA,216422448.0,,9.969,24.119,1229.402,266.07,248.218,...,7.252,3.47,17.0,35.275,248.1,442.376,95.74,1146.369,13.424,6.438
5635,Denmark,2023,DNK,5910911.0,,-0.681,-0.019,464.427,2.745,1170.716,...,9.277,4.62,1.929,0.773,50.381,3283.758,19.41,8523.436,57.716,25.767
13494,New Zealand,2023,NZL,5228101.0,,24.58,0.012,11.415,0.06,296.475,...,0.472,0.408,12.667,0.916,8.387,663.721,3.47,1604.127,7.794,3.507


## API definitions

In [584]:
# FastAPI by Ember
# https://api.ember-energy.org/v1/docs
base_url = 'https://api.ember-energy.org'
my_api_key = apikeys.ember_key

# dictionary with endpoints that may be necessary for the project.
EMBER_ENDPOINTS = {
    'generation_per_year': 'electricity-generation/yearly',
    'demand_per_year': 'electricity-demand/yearly',
    'carbon_per_year': 'carbon-intensity/yearly',
    'poweremissions_per_year': 'power-sector-emissions/yearly'
}

In [586]:
# WBGAPI - World Bank Data
# !pip install wbgapi
# https://pypi.org/project/wbgapi/

# dictionary with extracted indicators that may be necessary for the project.
GDP_INDICATORS = {
    # Constant currency value indicators
    'gdp_constant': 'NY.GDP.MKTP.KD',
    'gdp_per_capita_constant': 'NY.GDP.PCAP.KD'
}

In [588]:
# Country Facts API for population
# https://rapidapi.com/richardarthur228/api/country-facts
population_file = 'data/country_population.json'
if os.path.exists(population_file):
    print('The file already exists')
else:
    headers = {'x-rapidapi-key': apikeys.country_key, 'x-rapidapi-host': 'country-facts.p.rapidapi.com'}
    response = requests.get('https://country-facts.p.rapidapi.com/countries', headers=headers)
    func.save_json(response.json(), population_file)

The file already exists


## Clean Dataset

In [597]:
# Dictionary für die Spalten-Umbenennung
rename_mappings = {
    'df_energy_prod': {
        'Entity': 'country', 'Code': 'iso_code', 'Year': 'year',
        'Electricity from wind (TWh)': 'wind',
        'Electricity from hydro (TWh)': 'hydro',
        'Electricity from solar (TWh)': 'solar',
        'Other renewables including bioenergy (TWh)': 'other_inc_bio'
    },
    'df_cons': {
        'Entity': 'country', 'Code': 'iso_code', 'Year': 'year',
        'Nuclear (% equivalent primary energy)': 'energy_cons_nuclear',
        'Fossil fuels (% equivalent primary energy)': 'energy_cons_fossil',
        'Renewables (% equivalent primary energy)': 'energy_cons_renewable'
    },
    'df_prod': {
        'Entity': 'country', 'Code': 'iso_code', 'Year': 'year',
        'Nuclear - % electricity': 'elec_prod_nuclear',
        'Fossil fuels - % electricity': 'elec_prod_fossil',
        'Renewables - % electricity': 'elec_prod_renewable'
    }
}
# Verarbeitung der DataFrames mit der Funktion
for key, rename_dict in rename_mappings.items():
    df_dict[key], globals()[f"{key}_world"], globals()[f"{key}_regions"] = func.preprocess_df(df_dict[key], rename_dict)

In [166]:
#data = func.ember_fetch_data(EMBER_ENDPOINTS['demand_per_year'], countries, my_api_key, base_url)

In [168]:
#func.save_json(data, 'data/ember_ren_cons_22_23_24.json')

In [170]:
energy_con_file = 'data/ember_enrich_ren_prod_22_23_24.csv'
if os.path.exists(energy_con_file):
    df_ember = pd.read_csv(energy_con_file)
    print('The file already exists')
else:
    df_json_data = func.load_json('data/ember_ren_prod_23_24.json')
    df_json_filtred = func.ember_filter_data(df_json_data)
    df_json_agg = func.ember_aggregate_bioenergy_other_renewables(df_json_filtred)
    df_json_reshaped = func.ember_reshape_data(df_json_agg)
    df_json_reshaped.to_csv(energy_con_file, index=False)
    print(f'The file {energy_con_file} has been created')

The file already exists


In [172]:
df_owid = df_owid[df_owid['year'] >= 1985]
df_owid2 = df_owid[owid_columns]
df_owid2 = df_owid2[df_owid2['country'].isin(country_list)]
df_owid2.to_csv('data/forvisualization/whole_file.csv', index=False)

## Create country file

In [470]:
country_file = 'data/countries.csv'

if os.path.exists(country_file):
    df_countries = pd.read_csv(country_file)
    print('The file countries.csv already exists')
else:
    # Create countries DataFrame
    df_countries['name'] = df_dict['df_energy_prod']['country'].unique()
    df_countries['iso_code'] = df_dict['df_energy_prod']['iso_code'].unique()
    df_countries.sort_values(by=['name'], inplace=True)
    df_countries.reset_index(drop=True, inplace=True)
    df_countries.loc[len(df_countries)] = {'name': 'World', 'iso_code': 'OWID_WRL'}

    # Load population data
    data = func.load_json('data/country_population.json')
    df_pop = pd.DataFrame([
        (country['cca3'], int(country['population'].replace(',', '')))
        for country in data['result']
    ], columns=['iso_code', 'population'])

    # Merge country and population data
    #df_countries = df_countries.merge(df_pop, on='iso_code', how='left')

    # Handle missing values
    df_countries.dropna(subset=['name'], inplace=True)

    # Save as the single final file
    df_countries.to_csv(country_file, index=True)
    print(f'The file {country_file} has been created')

The file data/countries.csv has been created


## Enrich Energy Data with Ember FastAPI

In [178]:
ember_file = 'data/ember_ren_prod_23_24.json'
if os.path.exists(ember_file):
    print('The file already exists')
else:
    data = func.ember_fetch_data(EMBER_ENDPOINTS['generation_per_year'], countries, my_api_key, base_url)
    func.save_json(data, ember_file)

The file already exists


In [180]:
energy_file = 'data/ember_enrich_ren_prod_22_23_24.csv'
if os.path.exists(energy_file):
    df_ember = pd.read_csv(energy_file)
    print('The file already exists')
else:
    df_json_data = func.load_json('data/ember_ren_prod_23_24.json')
    df_json_filtred = func.ember_filter_data(df_json_data)
    df_json_agg = func.ember_aggregate_bioenergy_other_renewables(df_json_filtred)
    df_json_reshaped = func.ember_reshape_data(df_json_agg)
    df_json_reshaped.to_csv(energy_file, index=False)
    print(f'The file {energy_file} has been created')

The file already exists


## Enrich GDP Data with World Bank API

In [518]:
bank_file = 'data/forvisualization/wb_enrich_ren_prod_1965_23.csv'
if os.path.exists(bank_file):
    df_wb = pd.read_csv(bank_file)
    print('The file already exists')
else:
    df_wb = func.wb_get_data(GDP_INDICATORS, countries)
    df_wb['year'] = df_wb['year'].astype(int)
    # map countries index on iso_code and remove redundant columns
    df_wb['fk_country'] = df_wb['iso_code'].map(lambda iso: df_countries[df_countries['iso_code'] == iso].index[0])
    df_wb.to_csv(bank_file, index=False)
    print(f'The file {bank_file} has been created')

The file already exists


## Combine Datasets

In [516]:
# Combine base dataset with enriched data
combined_energy_file = 'data/forvisualization/enriched_energy_prod.csv'
if os.path.exists(combined_energy_file):
    df_energy = pd.read_csv(combined_energy_file, index_col=0)
    print('The file already exists')
else:
    df_energy = pd.concat([df_dict['df_energy_prod'], df_ember])
    df_energy = df_energy.sort_values(by=['iso_code', 'year'])
    df_energy.reset_index(drop=True, inplace=True)
    # map countries index on iso_code and remove redundant columns
    df_energy['fk_country'] = df_energy['iso_code'].map(lambda iso: df_countries[df_countries['iso_code'] == iso].index[0])
    df_energy.drop(['country'], axis=1, inplace=True)
    df_energy.to_csv(combined_energy_file)
    print(f'The file {combined_energy_file} has been created')

The file already exists


In [236]:
# Combine energy and bank file
combined_energy_bank_file = 'data/forvisualization/combined_energy_wb.csv'
if os.path.exists(combined_energy_bank_file):
    df_combined = pd.read_csv(combined_energy_bank_file, index_col=0)
    print('The file already exists')
else:
    exclude_cols = {'year', 'fk_country'}
    columns_to_check = [col for col in df_combined.columns if col not in exclude_cols]
    df_combined = df_energy.merge(df_wb, on=['fk_country', 'year'], how='outer')
    df_combined.fillna(0, inplace=True)
    df_combined = df_combined.loc[(df_combined[columns_to_check] != 0).any(axis=1)]
    df_combined.to_csv(combined_energy_bank_file)
    print(f'The file {combined_energy_bank_file} has been created')

The file data/forvisualization/combined_energy_wb.csv has been created
