# Hydro  vs Renewables 

Data from the [statistical review of world energy](https://www.energyinst.org/statistical-review)

**Note:** I added to the original Excel file, a sheet with the codes and regions numbers of the countries in the dataset. This allows coloring the countries by region, and labeling them by code. See the file `Statistical Review of World Energy Data.xlsx` in the data directory.

In [None]:
# import libraries
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# primary energy consumption
# file path
path = "../data/Statistical Review of World Energy Data.xlsx"
# read data from third row and drop last 13 rows
pec = pd.read_excel(path, sheet_name = "Primary Energy Consumption", header=2, skipfooter=13) 
# remove empty rows
pec.dropna(inplace=True)
# remove rows containing "Total"
pec.drop(pec[pec["Exajoules"].str.contains("Total")].index, inplace=True)
# remove last three columns
pec.drop(columns=pec.columns[-3:], axis=1,  inplace=True)
# make first column the index of the DataFrame
pec.set_index("Exajoules", inplace=True)
# make all columns numeric
pec = pec.apply(pd.to_numeric, errors="coerce")
# Number of rows should be 92
print("Number of rows =", len(pec.index))

pec.tail()

In [None]:
# primary energy consumption per capita
# read data from third row and drop last 13 rows
pec_cap = pd.read_excel(path, sheet_name = "Primary Energy - Cons capita", header=2, skipfooter=13) 
# remove empty rows
pec_cap.dropna(inplace=True)
# remove rows containing "Total"
pec_cap.drop(pec_cap[pec_cap["Gigajoule per capita"].str.contains("Total")].index, inplace=True)
# remove last two columns
pec_cap.drop(columns=pec_cap.columns[-2:], axis=1,  inplace=True)
# make first column the index of the DataFrame
pec_cap.set_index("Gigajoule per capita", inplace=True)
# make all columns numeric
pec_cap = pec_cap.apply(pd.to_numeric, errors="coerce")
# Number of rows should be 92
print("Number of rows =", len(pec_cap.index))

pec_cap.tail()

In [None]:
# hydro consumption
# read data from third row and drop last 14 rows
hydro = pd.read_excel(path, sheet_name = "Hydro Consumption - EJ", header=2, skipfooter=14) 
# remove empty rows
hydro.dropna(inplace=True)
# remove rows containing "Total"
hydro.drop(hydro[hydro["Exajoules"].str.contains("Total")].index, inplace=True)
# remove last three columns
hydro.drop(columns=hydro.columns[-3:], axis=1,  inplace=True)
# make first column the index of the DataFrame
hydro.set_index("Exajoules", inplace=True)
# make all columns numeric
hydro = hydro.apply(pd.to_numeric, errors="coerce")
# Number of rows should be 92
print("Number of rows =", len(hydro.index))

hydro.tail()

In [None]:
# renewables consumption
# read data from third row and drop last 14 rows
ren = pd.read_excel(path, sheet_name = "Renewables Consumption - EJ", header=2, skipfooter=14) 
# remove empty rows
ren.dropna(inplace=True)
# remove rows containing "Total"
ren.drop(ren[ren["Exajoules (input-equivalent)"].str.contains("Total")].index, inplace=True)
# remove last three columns
ren.drop(columns=ren.columns[-3:], axis=1,  inplace=True)
# make first column the index of the DataFrame
ren.set_index("Exajoules (input-equivalent)", inplace=True)
# make all columns numeric
ren = ren.apply(pd.to_numeric, errors="coerce")
# Number of rows should be 92
print("Number of rows =", len(ren.index))

ren.tail()

In [None]:
# codes and regions
cod_reg = pd.read_excel(path, sheet_name = "Codes and regions") 
# make first column the index of the DataFrame
cod_reg.set_index("Country", inplace=True)
# use only the indexes/countries in the pec DataFrame
cod_reg = cod_reg.loc[pec.index]
# set the name of the axis for the index to ""
cod_reg.rename_axis("", inplace=True)

# Number of rows should be 91
print("Number of rows =", len(cod_reg.index))

cod_reg.tail()

In [None]:
# check the indexes of the DataFrames are equal
print(pec.index.equals(pec_cap.index))
print(pec.index.equals(hydro.index))
print(pec.index.equals(ren.index))
print(pec.index.equals(cod_reg.index))

In [None]:
# compute population by dividing primary energy consumption pec,
# by primary energy_consumption per capita pec_cap
# Notice that pec is in Exajoules, while pec_cap is in Gigajoules
# Therefore population in millions is
population = (pec*1000)/pec_cap
# set the name of the axis for the index to Millions
population.rename_axis("Millions", inplace=True)

population.tail()

In [None]:
# graph as scatter the hydro consumption versus renewable consumption for the year 2022
# color the points by region and make their size proportional to population

# regions:
# 1 = North America
# 2 = South and Central America
# 3 = Europe
# 4 = CIS
# 5 = Middle East
# 6 = Africa
# 7 = Asia Pacific
regions = [1, 2, 3, 4, 5, 6, 7]
regions = regions[::-1] # reverse list of regions

# colors for regions
colors = ["palegreen", "darkgreen", "blue", "magenta", "orange", "red", "yellow"]
colors = colors[::-1] # reverse list of colors

# year
year = 2022

# make figure
fig, ax = plt.subplots(figsize=(15,7.5))

# for each region
for (region, color) in zip(regions, colors):
    # extract region data
    my_hydro = hydro[cod_reg["region"] == region]
    my_ren = ren[cod_reg["region"] == region]
    my_population = population[cod_reg["region"] == region]
    # plot data
    ax.scatter(my_hydro[year], my_ren[year], s=my_population[year]*2, 
               c=color, edgecolor="0", alpha=0.75, zorder=2)
    # plot labels
    for index in my_hydro.index:
        if my_hydro.loc[index,year] >= 0.01 and my_ren.loc[index,year] >= 0.01:
            ax.text(x=my_hydro.loc[index,year], y=my_ren.loc[index,year], 
                    s=cod_reg.loc[index,"code"], size=8, zorder=3)

# plot year
ax.text(x = 0.03, y = 0.15, s=str(year), 
        fontdict=dict(fontfamily="Courier New", color="lightgray", size=250), zorder=1)    

# set axes
ax.set_xlim([0.01, 20])
ax.set_ylim([0.01, 30])
ax.set_xscale("log") # x axis is log
ax.set_yscale("log") # y axis is log
ax.set_xlabel("Hydro consumption [Exajoules]")
ax.set_ylabel("Renewables consumption [Exajoules]")
ax.grid(True, which="both")

In [None]:
# run this cell to install celluloid
import sys
!{sys.executable} -m pip install celluloid

In [None]:
# Create animation of hydro consumption versus renewables consumption  over time

# import celluloid Camera
from celluloid import Camera

# create figure
fig, ax = plt.subplots(figsize=(15,7.5))
# set axes
ax.set_xlim([0.01, 20])
ax.set_ylim([0.01, 30])
ax.set_xscale("log") # x axis is log
ax.set_yscale("log") # y axis is log
ax.set_xlabel("Hydro consumption [Exajoules]")
ax.set_ylabel("Renewables consumption [Exajoules]")
ax.grid(True, which="both")
# create camera
camera = Camera(fig)

# for each year
for year in hydro.columns:
    # for each region
    for (region, color) in zip(regions, colors):
        # extract region data
        my_hydro = hydro[cod_reg["region"] == region]
        my_ren = ren[cod_reg["region"] == region]
        my_population = population[cod_reg["region"] == region]
        # plot data
        ax.scatter(my_hydro[year], my_ren[year], s=my_population[year]*2, 
                   c=color, edgecolor="0", alpha=0.75, zorder=2)
        # plot labels
        for index in my_hydro.index:
            if my_hydro.loc[index,year] >= 0.01 and my_ren.loc[index,year] >= 0.01:
                ax.text(x=my_hydro.loc[index,year], y=my_ren.loc[index,year], 
                        s=cod_reg.loc[index,"code"], size=8, zorder=3)
    # plot year
    ax.text(x = 0.03, y = 0.15, s=str(year), 
            fontdict=dict(fontfamily="Courier New", color="lightgray", size=250), zorder=1)
    # snap current plot
    camera.snap()

To play the animation in the notebook, you may need to install ffmpeg. For macOS, follow [this link](https://phoenixnap.com/kb/ffmpeg-mac)

In [None]:
# import HTML to display video in notebook
from IPython.display import HTML
# create animation
animation = camera.animate(interval = 500, repeat = True, repeat_delay = 500)
# play animation
HTML(animation.to_html5_video())

In [None]:
# save animation
animation.save("../movies/HydroVsRen.mp4", dpi=300)