# https://www.kaggle.com/datasets/pralabhpoudel/world-energy-consumption

## Codebook - data description
https://github.com/owid/energy-data/blob/master/owid-energy-codebook.csv

Import packages

In [1]:
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Read data

In [2]:
df = pd.read_csv("World Energy Consumption.csv")

Show size of data

In [3]:
df.shape

(22012, 129)

Filter countries

In [4]:
countries = np.array([
    'Spain',
    'Poland',
    'Germany',
    'France',
    'Portugal',
    'Belgium',
    'Netherlands',
    'Austria',
    'Switzerland',
    'Finland',
    'Norway',
    'Sweden',
    'Lithuania',
    'Estonia',
    'Italy',
    'Slovakia',
    'Serbia',
    'Greece',
    'Croatia',
    'Hungary'
])
countries_df = df[df['country'].isin(countries)]

Filter data by selected criteria

In [5]:
columns = np.array([
    'country',
    'year',

    # CONSUMPTION
    'coal_cons_per_capita', #Coal consumption per capita - Measured in kilowatt-hours per person.
    'fossil_energy_per_capita', # Fossil fuel consumption per capita - Measured in kilowatt-hours per person,
    'gas_energy_per_capita', # Gas consumption per capita - Measured in kilowatt-hours per person.
    'hydro_energy_per_capita', # Hydropower consumption per capita - Measured in kilowatt-hours per person.
    'nuclear_energy_per_capita', # 	Nuclear power consumption per capita - Measured in kilowatt-hours per person.
    'oil_energy_per_capita', # 	Oil consumption per capita - Measured in kilowatt-hours per person.
    'renewables_energy_per_capita', # Renewables consumption per capita - Measured in kilowatt-hours per person.
    'solar_energy_per_capita', # Solar power consumption per capita - Measured in kilowatt-hours per person.
    'wind_energy_per_capita', # Wind power consumption per capita - Measured in kilowatt-hours per person.
    
    'coal_share_energy', # 	Share of primary energy consumption that comes from coal - Measured as a percentage of the total primary energy, using the substitution method.
    'fossil_share_energy', # Share of primary energy consumption that comes from fossil fuels - Measured as a percentage of the total primary energy, using the substitution method.
    'gas_share_energy', # Share of primary energy consumption that comes from gas - Measured as a percentage of the total primary energy, using the substitution method.
    'hydro_share_energy', # Share of primary energy consumption that comes from hydropower - Measured as a percentage of the total primary energy, using the substitution method.
    'nuclear_share_energy', # Share of primary energy consumption that comes from nuclear power - Measured as a percentage of the total primary energy, using the substitution method.
    'oil_share_energy', # Share of primary energy consumption that comes from oil - Measured as a percentage of the total primary energy, using the substitution method.
    'renewables_share_energy', # Share of primary energy consumption that comes from renewables - Measured as a percentage of the total primary energy, using the substitution method.
    'solar_share_energy', # Share of primary energy consumption that comes from solar power - Measured as a percentage of the total primary energy, using the substitution method.
    'wind_share_energy', # Share of primary energy consumption that comes from wind power - Measured as a percentage of the total primary energy, using the substitution method.

    # PRODUCTION
    'coal_elec_per_capita', # Electricity generation from coal per person - Measured in kilowatt-hours per person
    'fossil_elec_per_capita', # Electricity generation from fossil fuels per person - Measured in kilowatt-hours per person.
    'gas_elec_per_capita', # Electricity generation from gas per person - Measured in kilowatt-hours per person.
    'hydro_elec_per_capita', # Electricity generation from hydropower per person - Measured in kilowatt-hours per person.
    'nuclear_elec_per_capita', # Electricity generation from nuclear power per person - Measured in kilowatt-hours per person.
    'oil_elec_per_capita', # Electricity generation from oil per person - Measured in kilowatt-hours per person.
    'renewables_elec_per_capita', # Electricity generation from renewables per person - Measured in kilowatt-hours per person.
    'solar_elec_per_capita', # Electricity generation from solar power per person - Measured in kilowatt-hours per person.
    'wind_elec_per_capita', # Electricity generation from wind power per person - Measured in kilowatt-hours per person.

    'coal_share_elec', # Share of electricity generated by coal - Measured as a percentage of total electricity.
    'fossil_share_elec', # Share of electricity generated by fossil fuels - Measured as a percentage of total electricity.
    'gas_share_elec', # Share of electricity generated by gas - Measured as a percentage of total electricity.
    'hydro_share_elec', # Share of electricity generated by hydropower - Measured as a percentage of total electricity.
    'nuclear_share_elec', # Share of electricity generated by nuclear power - Measured as a percentage of total electricity.
    'oil_share_elec', # Share of electricity generated by oil - Measured as a percentage of total electricity.
    'renewables_share_elec', # Share of electricity generated by renewables - Measured as a percentage of total electricity.
    'solar_share_elec', # Share of electricity generated by solar power - Measured as a percentage of total electricity.
    'wind_share_elec', # Share of electricity generated by wind power - Measured as a percentage of total electricity.
])

countries_df = countries_df[columns]

Filter by year

In [6]:
countries_df = countries_df[countries_df['year'] >= 2022]

Show data

In [7]:
countries_df

Unnamed: 0,country,year,coal_cons_per_capita,fossil_energy_per_capita,gas_energy_per_capita,hydro_energy_per_capita,nuclear_energy_per_capita,oil_energy_per_capita,renewables_energy_per_capita,solar_energy_per_capita,...,wind_elec_per_capita,coal_share_elec,fossil_share_elec,gas_share_elec,hydro_share_elec,nuclear_share_elec,oil_share_elec,renewables_share_elec,solar_share_elec,wind_share_elec
1799,Austria,2022,3164.75,27059.148,8851.414,10363.625,,15042.984,15626.231,938.635,...,775.201,0.216,23.713,18.179,55.758,0.0,5.318,76.287,4.22,10.713
2408,Belgium,2022,2744.635,42769.266,12485.879,70.599,9386.54,27538.754,6240.019,1514.476,...,1035.525,0.042,27.892,24.445,0.211,46.659,3.405,25.449,7.771,12.762
4933,Croatia,2022,1186.811,17149.828,6046.492,3506.509,0.0,9916.524,6318.343,104.437,...,563.225,10.907,36.686,25.708,37.89,0.0,0.071,63.314,1.062,16.076
6238,Estonia,2022,25660.084,39405.848,2730.869,46.085,0.0,11014.897,7099.013,1164.383,...,475.09,0.0,55.995,0.48,0.24,0.0,55.516,44.005,6.355,7.554
7231,Finland,2022,6123.523,24868.705,1953.809,6428.19,11396.982,16791.373,22700.471,148.123,...,2183.822,4.069,10.769,1.808,18.824,34.333,4.891,54.898,0.411,16.578
7354,France,2022,920.022,19371.66,5937.455,1798.676,11409.492,12514.184,5270.644,812.113,...,595.111,0.937,12.154,9.159,9.836,63.304,2.058,24.542,4.256,8.192
7803,Germany,2022,7763.265,31225.268,9270.737,547.259,1041.292,14191.266,8710.929,1900.926,...,1512.537,31.085,50.783,16.54,2.997,6.27,3.158,42.947,10.129,21.656
8090,Greece,2022,1927.131,24361.199,5966.988,1142.348,0.0,16467.082,6049.146,1795.8,...,1045.742,10.412,56.693,37.3,9.02,0.0,8.982,43.307,12.624,20.709
8981,Hungary,2022,1385.822,20260.748,9209.361,46.087,3968.923,9665.565,2430.819,1216.101,...,61.2,8.164,34.178,24.944,0.535,44.623,1.07,21.199,12.584,1.717
10026,Italy,2022,1435.913,24111.465,11055.562,1243.551,,11619.991,4798.736,1215.307,...,337.243,7.599,63.562,50.685,10.737,0.0,5.279,36.438,9.944,7.14


Show size of data

In [8]:
countries_df.shape

(20, 38)

Remove rows with missing data

In [9]:
countries_df = countries_df.dropna()

Show preprocessed data

In [10]:
countries_df

Unnamed: 0,country,year,coal_cons_per_capita,fossil_energy_per_capita,gas_energy_per_capita,hydro_energy_per_capita,nuclear_energy_per_capita,oil_energy_per_capita,renewables_energy_per_capita,solar_energy_per_capita,...,wind_elec_per_capita,coal_share_elec,fossil_share_elec,gas_share_elec,hydro_share_elec,nuclear_share_elec,oil_share_elec,renewables_share_elec,solar_share_elec,wind_share_elec
2408,Belgium,2022,2744.635,42769.266,12485.879,70.599,9386.54,27538.754,6240.019,1514.476,...,1035.525,0.042,27.892,24.445,0.211,46.659,3.405,25.449,7.771,12.762
4933,Croatia,2022,1186.811,17149.828,6046.492,3506.509,0.0,9916.524,6318.343,104.437,...,563.225,10.907,36.686,25.708,37.89,0.0,0.071,63.314,1.062,16.076
6238,Estonia,2022,25660.084,39405.848,2730.869,46.085,0.0,11014.897,7099.013,1164.383,...,475.09,0.0,55.995,0.48,0.24,0.0,55.516,44.005,6.355,7.554
7231,Finland,2022,6123.523,24868.705,1953.809,6428.19,11396.982,16791.373,22700.471,148.123,...,2183.822,4.069,10.769,1.808,18.824,34.333,4.891,54.898,0.411,16.578
7354,France,2022,920.022,19371.66,5937.455,1798.676,11409.492,12514.184,5270.644,812.113,...,595.111,0.937,12.154,9.159,9.836,63.304,2.058,24.542,4.256,8.192
7803,Germany,2022,7763.265,31225.268,9270.737,547.259,1041.292,14191.266,8710.929,1900.926,...,1512.537,31.085,50.783,16.54,2.997,6.27,3.158,42.947,10.129,21.656
8090,Greece,2022,1927.131,24361.199,5966.988,1142.348,0.0,16467.082,6049.146,1795.8,...,1045.742,10.412,56.693,37.3,9.02,0.0,8.982,43.307,12.624,20.709
8981,Hungary,2022,1385.822,20260.748,9209.361,46.087,3968.923,9665.565,2430.819,1216.101,...,61.2,8.164,34.178,24.944,0.535,44.623,1.07,21.199,12.584,1.717
10922,Lithuania,2022,812.267,20259.76,5844.902,442.436,0.0,13602.591,2891.293,195.65,...,578.17,0.0,25.18,14.628,10.791,0.0,10.552,74.82,10.312,38.129
13213,Netherlands,2022,3672.28,47356.742,15456.154,7.434,592.269,28228.312,8051.746,2623.714,...,1230.356,12.077,56.704,39.623,0.05,3.425,5.005,39.871,13.922,17.876


Save to file

In [69]:
countries_df.to_csv('data_2022.csv')