# Download datasets

In [None]:
import os
from urllib.request import urlretrieve

In [None]:
cwd = os.getcwd()     # Current working directory
print(f"Your current working directory is: {cwd}")

In [None]:
# Create a folder "Datasets" in a parent directory for storing datasets
os.mkdir(r"..\Datasets")

In [None]:
# Change current working directory to the "Datasets" directory, ready for downloading datasets
os.chdir(r"..\Datasets")

datapath = os.getcwd()

print(f"Current working directory is: {datapath}")

In the next cell, datasets will be downloaded from the World Bank website to the Datasets directory.

In [None]:
# Download roots
hydro_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.HYRO.ZS?downloadformat=csv"         # Electricity generation from hydroelectric sources
coal_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.COAL.ZS?downloadformat=csv"          # Electricity generation from coal
nuclear_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.NUCL.ZS?downloadformat=csv"   # From nuclear sources
nat_gas_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.NGAS.ZS?downloadformat=csv"    # From natiral gases
oil_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.PETR.ZS?downloadformat=csv"       # From oil sources
fossils_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.FOSL.ZS?downloadformat=csv"       # From oil, gas and coal sources
renew_link = "https://api.worldbank.org/v2/en/indicator/EG.ELC.RNWX.ZS?downloadformat=csv"

dwnld_root_list = [hydro_link, coal_link, nuclear_link, nat_gas_link, oil_link, fossils_link, renew_link]

save_as = ["hydro", "coal", "nuclear", "natural gas", "oil", "fossil fuels", "renewable"]     # Names of downloaded files


# Function to generate names of downloaded files
def generate_filename(filename):
    """
    Docstring: Generate names of downloaded files.
    filename - List of strings containing desired names of downloaded files.
    
    Returns: List of strings containing generated files names.
    """
    names_of_files = []
    for item in filename:
        file = datapath + "\\" + str(item) + ".zip"     # Generating file names with the .zip extension in the Datasets folder
        names_of_files.append(file)
    return names_of_files


# Function to download datasets
def download_data(url, filename):
    """
    Docstring: Download files.
    url (string, list of strings) - Download path of a file.
    filename (string, list of strings) - Name that a donwnloaded file will be saved as.
    
    Returns: None.
    """
    num_dwnld_files = 0
    for item, fname in zip(url, filename):
        urlretrieve(item, fname)
        num_dwnld_files += 1
    print(f"A total of {num_dwnld_files} files have been downloaded in the directory: {datapath}.")
        
        
# Generate names of files using a function generate_filename(), then, download the datasets using a function download_data()
names_of_files = generate_filename(save_as)

download_data(dwnld_root_list, names_of_files)

The downloaded datasets can then be extracted manually in order to obtain ".csv" files that will later be analyzed in this project.

In this case, the corresponding CSV files were renamed to: *hydro*, *coal*, *nuclear*, *nat_gas*, *oil*, *fossil*, and *renewable*.