# Introduction

Importing necesary libraries

In [1]:
from urllib.request import urlretrieve                 # For downloading data
import os
import numpy as np
import pandas as pd                                    # Crucial for data processing and analysis
import matplotlib.pyplot as plt                        # For visualization of data

# Downloading Data

The worldwide data for electricity generation can be downloaded from the [The World Bank website](https://www.worldbank.org/en/home "The World Bank"). The data files that will be used in this project together with their download links are summarized in the next cell. Note that the download links direct to compressed (that is, ".zip") files containing Comma-Separated Values (".csv") files; however, Excel (.xls or .xlsx) files are also available on the same website.

Note that in this case, the download path where the data files will be saved has been specified based on my project structure. Feel free to modify the lines or break the lines in the cell to suit your needs.

In [31]:
# Electricity generation data
# Download links:
hydro_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.HYRO.ZS?downloadformat=csv"         # Electricity generation from hydroelectric sources
coal_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.COAL.ZS?downloadformat=csv"          # Electricity generation from coal
nuclear_src_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.NUCL.ZS?downloadformat=csv"   # From nuclear sources
natGas_src_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.NGAS.ZS?downloadformat=csv"    # From natiral gases
oil_src_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.PETR.ZS?downloadformat=csv"       # From oil sources
fossils_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.FOSL.ZS?downloadformat=csv"       # From oil, gas and coal sources
renew_src_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.RNWX.ZS?downloadformat=csv"     # From renewable sources (excluding hydroelectric source)

download_links = [hydro_link, coal_link, nuclear_src_link, natGas_src_link, oil_src_link, fossils_link, renew_src_link]     # A list containing summary of download links

files_names = ["hydroelectric", "coal", "nuclear", "natural gas", "oil", "fossils", "renewable sources"]     # A list containing (saving) names of files

save_path = "D:\Coding\Projects\Worldwide electricity generation\Data"


# Function to generate save names of files that will be downloaded from the World Bank website
def generate_files_names(save_path, list_of_names):
    """
    Generates desired names of files that will be downloaded
    """
    fnames_output = []
    for item in list_of_names:
        filename = save_path + "\\" + str(item) + ".zip"     # The World Bank files are provided in a .zip format
        fnames_output.append(filename)
    return fnames_output                                  # A complete file path with file name is returned in a list


# Download the data files using the above links (i.e., download_links)

def download_files(list_of_urls):
    """
    Docstring:
    A function to download multiple files from a website.
    -----------------------------------------------------
    list_of_urls: list; a list variable containing url paths from which data files will be downloaded.
    path: string; path to which downloaded files will be saved.    
    """
    
    # Generate save names of files using a function generate_files_names() using variables "save_path" and "files_names" as keywords
    fnames_output = generate_files_names(save_path, files_names)
    
    num_of_downloaded_files = 0
    for url, filename in zip(list_of_urls, fnames_output):
        urlretrieve(url, filename)
        num_of_downloaded_files += 1
    print(f"Report: A total of {num_of_downloaded_files} files have been downloaded.")
    
download_files(download_links)

Report: A total of 7 files have been downloaded.
