In [1]:
"""
This program generates 12 plots and saves each of them as a separate pdf - 

2-Consumption_all_countries.pdf
2-Consumption_all_years.pdf
2-Consumption_each_country
2-Consumption_each_year
4-Electricity_Natural_Gas_all_countries
4-Electricity_Natural_Gas_each_country
6-Industrial_usage_all_countries
6-Industrial_usage_each country
7-China_consumption
9-Residential_usage_all_continents
9-Residential_usage_each continent
11-CarbonEmissions_vs_2015_Consumption

"""

%matplotlib inline
import matplotlib.pyplot as plt
import math
import numpy as np
import pprint as pp
from matplotlib.backends.backend_pdf import PdfPages


def plot_hist_each(Consumers, Consumers_countries_list, Consumers_years_list):      
    """
    This function generates 2 kinds of plots -
        1) each country all the years, and saves the plots to "Consumption_each_country.pdf"
        2) each year all the countries and saves the plots to "Consumption_each_year.pdf"
    """

    filename1 = "2-Consumption_each_country.pdf"
    title_list1 = Consumers_countries_list

    filename2 = "2-Consumption_each_year.pdf"
    title_list2 = Consumers_years_list

    # since we are saving 44 plots (for countries) in 1 pdf, use PdfPages
    with PdfPages(filename1) as pdf:
        for i in range( Consumers.shape[0] ): # iterate over rows (Countries) 
                                             # for each country, get all years data
            fig, ax = plt.subplots(1,1,figsize=(6,3))
            ax.grid(alpha=.4)

            num_bins = get_bins(Consumers[i,:]) # for each row, get all the cols and get the num of bins 
            ax.hist(Consumers[i,:], bins = num_bins, 
                    color ='r', edgecolor = "black", alpha=.5)

            ax.set_title("Consumption for " + title_list1[i] + " between 1990 - 2016", fontsize=10) 
            ax.set_xlabel("Consumption (MTOE)", fontsize = 8)
            ax.set_ylabel("Count", fontsize = 8)

            pdf.savefig(fig, dpi=500, bbox_inches='tight')
            plt.close(fig)        

            #plt.show()
            #plt.close()
    
    # since we are saving 27 plots (for years) in 1 pdf, use PdfPages
    with PdfPages(filename2) as pdf:
        for i in range(Consumers.shape[1]): # iterate over cols (Years) 
                                            # # for each year, get all countries data
            fig, ax = plt.subplots(1,1,figsize=(6,3))       
            ax.grid(alpha=.4)

            num_bins = get_bins(Consumers[:, i]) # for each col, get all the rows
            ax.hist(Consumers[:, i], bins = num_bins, 
                    color ='y', edgecolor = "black", alpha=.5)               

            ax.set_title("Consumption for the year " + title_list2[i], fontsize=10)  
            ax.set_xlabel("Consumption (MTOE)", fontsize = 8)
            ax.set_ylabel("Count", fontsize = 8)     

            pdf.savefig(fig, dpi=500, bbox_inches='tight')
            plt.close(fig)

            # plt.close(fig)
            # plt.show()    

    
def plot_before_after_each(np_array, array_name, Consumers_countries_list, index_2000):
    """
    This function plots 2 histograms (before_2000 and after_2000) on 1 axis    
    It slices the 2D np_array based on index_2000 to get the "before" and "after" 2D arrays
    It is used to plot Res and Ind arrays.
    The plots for each continent in Res (or country in Ind) is saved separately in a pdf using PdfPages
    """
    
    if array_name == "Res":
        continents_list = [ "Europe", "North America", "South America", "Asia", "Africa", "Middle East"]
        title = "Residential usage for "
        xlabel = "Residential usage (MTOE)"
        filename = "9-Residential_usage_each continent.pdf"
    else:
        title = "Industrial usage for "
        xlabel = "Industrial usage (MTOE)"
        filename = "6-Industrial_usage_each country.pdf"
        
    before = np_array[ :, :index_2000+1]
    after = np_array[ :, index_2000+1:]
    
    with PdfPages(filename) as pdf:
        for i in range( np_array.shape[0] ):

            fig, ax1 = plt.subplots(1,1,figsize=(6,3)) 
            ax1.grid(alpha=.4)

            num_bins1 = get_bins(before[i, :]) # for each row, get all the cols  
            num_bins2 = get_bins(after[i, :]) # for each row, get all the cols 

            ax1.hist(before[i, :], bins = num_bins1, color ='g', edgecolor='black', alpha=.5)     
            ax1.hist(after[i, :], bins = num_bins2, color ='r', edgecolor='black', alpha=.3)

            ax1.set_xlabel(xlabel, fontsize = 8)
            ax1.set_ylabel("Count", fontsize = 8)

            if array_name == "Res":
                ax1.set_title(title + continents_list[i], fontsize=12)
            else:
                ax1.set_title(title + Consumers_countries_list[i], fontsize=12)   

             # show legend
            ax1.legend(['before 2000', 'after 2000'], loc='best', fontsize=10)  
            
            pdf.savefig(fig, dpi=500, bbox_inches='tight')
            plt.close(fig)        

            #plt.show()
            #plt.close()            

        #plt.show()   

def get_highest_usage(CarbonEmissions_country_list, Consumers_2015_value_list):
    """
    This function makes a nested dictionary of the CarbonEmissions countries, and
    their corresponding 2015 Consumers values and continent
    and returns the top 5 countries and their continents lists
    """
    # Make a nested dictionary
    highest_usage_dict = {}
    
    # get the index for each country
    for i, country in enumerate(CarbonEmissions_country_list):
        highest_usage_dict[country] = {}
        highest_usage_dict[country]["value"] = Consumers_2015_value_list[i] 
        highest_usage_dict[country]["continent"] = Consumers_dict[country]["continent"]
        
    # Sort the Consumers_2015_value_list in descending order and get the top 5 values
    desc_values_list = sorted(Consumers_2015_value_list, reverse = True)
    top5_Consumers = desc_values_list[:5]
    
    # get the corresponding countries and continents
    top5_country = []
    top5_continent = []
    
    for value in top5_Consumers:
        for country, data in highest_usage_dict.items():        
            if value == data['value']:
                continent = data["continent"]            
                top5_country.append(country)                
                top5_continent.append(continent)
                
    return top5_country, top5_continent

def plot_double_bar_graph(CarbonEmissions_country_list, CarbonEmissions_values, Consumers_2015_value_list,
                         filename = "CarbonEmiss_Consm2015_double_bar.pdf"):
    """
    This function plots a double bar graph using twinx().
    Because the units are different, twinx() is used
    """
    x_axis = np.arange(len(CarbonEmissions_country_list) )
    
    with PdfPages(filename) as pdf:

        fig, ax1 = plt.subplots(1,1,figsize=(6,3))
        ax1.grid(alpha=.4)

        ax2 = ax1.twinx() 

        width = 0.3

        # Plot the carbon emission on ax1
        carb_em = ax1.bar(x_axis, CarbonEmissions_values, 
                          width=width, color='peru', align='center')

        # Plot the energy consumption for 2015 on ax2
        con_2015 = ax2.bar(x_axis + width, Consumers_2015_value_list, 
                           width=width, color='green', align='center')

        # ax1 decorations 
        ax1.set_xticks(x_axis + width/2) # set the x-ticks locations
        ax1.set_xticklabels(CarbonEmissions_country_list, fontsize=8) # label the x-axis with the country names
        
        yticks = np.linspace(min(CarbonEmissions_values),max(CarbonEmissions_values),10) # get yticks intervals
        ax1.set_yticks(yticks) # set the y-ticks locations     
        ax1.set_ylabel('Co2 Emissions (million metric tons)', 
                       color = "peru",
                       fontsize=8)
        ax1.tick_params(axis='y', 
                        rotation=0, 
                        labelcolor='peru' )

        # ax2 decorations (label y axis)
        ax2.set_yticks( np.linspace(min(Consumers_2015_value_list),max(Consumers_2015_value_list),10) )           
        ax2.set_ylabel('2015 Consumption (MTOE)', 
                       color = "green",
                       fontsize=8)
        ax2.tick_params(axis='y', 
                        rotation=0, 
                        labelcolor='green' )

        plt.title("Carbon Emission vs Consumption for 2015", fontsize = 10)

        # Rotate the tick labels and set their alignment.
        plt.setp(ax1.get_xticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")

        plt.legend([carb_em, con_2015],['Co2 Emissions (million metric tons)', '2015 Consumption (MTOE)'],
                  fontsize = 8)

        pdf.savefig(fig, dpi=500, bbox_inches='tight')
        plt.close(fig)        

            #plt.show()
            #plt.close()
            
def get_2015_value(Consumers, CarbonEmissions_country_list, Consumers_countries_list):
    """
    This function gets the index of each country in the CarbonEmissions_country_list
    from the Consumers_countries_list. Use this index, to get the respective 2015 value from
    Consumers
    """
    # we need each country's index(row number) to be able to access the 2015 (column) data 
    # i.e Consumers[country_index][-2] = 2015 data
    
    # get all the countries' indices from Consumers_countries_list into a list

    CE_country_index_list = []
    for country in CarbonEmissions_country_list:
        index = Consumers_countries_list.index(country)
        CE_country_index_list.append(index)
        
    # using the index, get the respective 2015 consumption value from Consumers
    Consumers_2015_value_list = []
    for index in CE_country_index_list:
        value = Consumers[index][-2]
        Consumers_2015_value_list.append(value)
        
    return Consumers_2015_value_list 

def get_CarbonEmissions(CarbonEmissions_data_list):
    """
    This function segregates the country names and their corresponding carbon emission values
    """
    CarbonEmissions_values = []
    CarbonEmissions_country_list = []

    for data in CarbonEmissions_data_list:
        #print('data = ', data)
        rank, country, co2_emission = data.split(",")
        #print("\nrank = ", rank) 
        #print("country = ", country) 
        #print("co2_emission = ", co2_emission)
        CarbonEmissions_country_list.append(country)
        CarbonEmissions_values.append( float(co2_emission) ) # convert str to float
        
    return CarbonEmissions_country_list, CarbonEmissions_values
    
    

def slice_and_compute(np_array, slice_index, before_percent, after_percent):
    """
    This function slices a 2D array into a before_array and after_array based on the slice_index, 
    computes the percentage on the corresponding arrays and concatenates the 2 arrays into a single
    array.
    """
    
    # slice the 2d array upto year 2000 
    before_array = np_array[ :, :slice_index+1 ] # all rows with column <= 2000
    
    # slice the 2d array after year 2000 
    after_array =  np_array[ :, slice_index+1: ] # all rows with column > 2000
    
    # percent of the total energy consumption before the year 2000 
    before_array_percent = before_array * before_percent

    # percent of the total energy consumption after the year 2000 
    after_array_percent = after_array * after_percent
    
    # concatenate the 2 arrays column-wise by specifying axis=1
    concat_array = np.concatenate( (before_array_percent,after_array_percent), axis=1 )
    
    return concat_array


def get_Continent(Consumers_dict):
    """
    This function gets the country and its respective continent from the Consumers_dict
    It collects the respective values into its continent list.
    It then sums all the rows for each continent and concatenates it into a single 2D array
    """
    europe_list = []
    na_list = []
    sa_list = []
    asia_list = []
    africa_list = []
    me_list = []
 
    for country, data in Consumers_dict.items():
        #print("\n\ncountry = ", country) 
        for value in data.values():
            # print("value = ", value)
            if value == "Europe":
                europe_list.append(data["values"])
            elif value == "North America":
                na_list.append(data["values"])
            elif value == "South America":
                sa_list.append(data["values"])
            elif value == "Asia":
                asia_list.append(data["values"])
            elif value == "Africa":
                africa_list.append(data["values"])
            elif value == "Middle East":
                me_list.append(data["values"])
                
    # To sum all the rows, axis = 0
    europe_total = np.array(europe_list).sum(axis=0) # a 1D array
    na_total = np.array(na_list).sum(axis=0) # a 1D array
    sa_total = np.array(sa_list).sum(axis=0) # a 1D array
    asia_total = np.array(asia_list).sum(axis=0) # a 1D array
    africa_total = np.array(africa_list).sum(axis=0) # a 1D array
    me_total = np.array(me_list).sum(axis=0) # a 1D array
    
    # concatenate the 6 arrays row-wise. By default, axis=0
    # i.e (1 row * 27 cols) + (1 row * 27 cols) .... + (6 row * 27 cols) ---> 6 rows * 27 cols

    # to concatenate 1D arrays of shape (N,) along the first axis (axis=0), the 1-D arrays  
    # have to be first reshaped to (1,N)
    Continent = np.concatenate( (europe_total.reshape(1,27),                        
                                 na_total.reshape(1,27),
                                sa_total.reshape(1,27),
                                asia_total.reshape(1,27),
                                africa_total.reshape(1,27),
                                me_total.reshape(1,27)), axis = 0) 
    
    return Continent

def get_EnergyType(EnergyType_data_list):
    """
    This function receives the EnergyType_data_list.
    It reads 2 lines (a pair) at a time, segregates the country and its
    corresponding electricity values and natural gas values into a nested dictionary (EnergyType).
    It returns EnergyType and a list of countries and years.
    """
    
    EnergyType = {} # a nested dictionary

    EnergyType_country_list = []
    EnergyType_years_list = []
    EnergyType_electricity_list = []
    EnergyType_ngas_list = []

    # read 2 lines at a time 
    i=0
    while i != len(EnergyType_data_list)/2:
        pair = EnergyType_data_list[i*2 : (i +1)*2]

        country1, elec_consumption, con_type1, year1 = pair[0].split(",")
        country2, ngas_consumption, con_type2, year2 = pair[1].split(",")

        if country1 not in EnergyType_country_list:
            EnergyType_country_list.append(country1)    

        if year1 not in EnergyType_years_list:
            EnergyType_years_list.append(year1)           

        # create a nested dict with "Electricity" and Natural Gas" as keys for each country    
        if country1 not in EnergyType:
            EnergyType[country1] = {}
            EnergyType[country1][con_type1] = []
            EnergyType[country1][con_type2] = []  

            EnergyType[country1][con_type1].append(float(elec_consumption))
            EnergyType[country1][con_type2].append(float(ngas_consumption))   
        else:        
            EnergyType[country1][con_type1].append(float(elec_consumption))
            EnergyType[country1][con_type2].append(float(ngas_consumption))

        i += 1
        
    return EnergyType, EnergyType_country_list, EnergyType_years_list 

def get_bcm_arrays(EnergyType):
    """
    This function converts each country's Electricity and Natural Gas values to a np array
    and saves it in a list.
    
    Unit of Electricity consumption = MTOE (Million tons of oil equivalent) 
    Unit of Natural Gas = bcm (billion cubic meter)
    Convert Electricity MTOE to Natural Gas bcm
    1 Mtoe = NG * 1.111111111111111 (https://www.unitjuggler.com/convert-energy-from-GcmNG-to-Mtoe.html)
    """
    
    elec_bcm_list = [] # list of np arrays
    ngas_list = [] # list of np arrays

    for key, consum_type in EnergyType.items(): # a nested dictionary
        #print("key = ", key)
        for con_type, value in consum_type.items():
            if con_type == "Electricity":
                elec = np.array(value)
                elec_bcm = elec * 1.11 # convert from MTOE to bcm
                elec_bcm_list.append(elec_bcm)
            else: 
                ngas_list.append( np.array(value) )
                 
    return elec_bcm_list, ngas_list

In [2]:
def plot_elec_ngas_each(EnergyType, EnergyType_country_list, EnergyType_years_list, 
                     filename = "4-Electricity_Natural_Gas_each_country.pdf"):
    
    """
    This function plots a line graph for both the Electricity consumption (in bcm) 
    and the Natural Gas consumption (in bcm) for each country and saves it in 
    the file "Electricity_Natural_Gas_each_country.pdf"
    """
   
    # list of np arrays
    elec_bcm_list, ngas_list = get_bcm_arrays(EnergyType)
        
    # use PdfPages to save multiple plots to a single pdf
    with PdfPages(filename) as pdf:
        for i in range( len(elec_bcm_list) ):
            fig, ax = plt.subplots(1,1,figsize=(6,3))
            ax.grid(alpha=.4)

            ax.plot( EnergyType_years_list, elec_bcm_list[i], 
                    color='red', linewidth=2, label = "Electricity" )
            ax.plot( EnergyType_years_list, ngas_list[i], 
                    color='olive', linewidth=2, label = "Natural Gas" )

            ax.set_title( EnergyType_country_list[i] + " Electricity vs Natural Gas",fontsize=10 )
            ax.set_xticks(np.arange(0, len(EnergyType_years_list)))
            ax.tick_params(axis='x', 
                          rotation=90,
                          size = 8)
            ax.set_xlabel('Year', 
                           fontsize=10)  

            yticks_l = np.linspace(min(ngas_list[i]),max(elec_bcm_list[i]),13)                           
            ax.set_yticks(yticks_l)
            ax.tick_params(axis='y',                       
                          size = 8)
            ax.set_ylabel('Consumption (bcm)', fontsize=10)

            # show legend
            plt.legend(loc = 'best', fontsize=8)    

            pdf.savefig(fig, dpi=500, bbox_inches='tight')
            plt.close(fig)        

            #plt.show()
            #plt.close()   
            

def plot_elec_ngas_all(EnergyType, EnergyType_country_list, EnergyType_years_list, 
                     param_dict, filename = "AAAAAAAAAAAAAAAAA.pdf"):
    
    """
    This function plots a line graph for both the Electricity consumption (in bcm) 
    and the Natural Gas consumption (in bcm) for each country in one big plot   
    """     
   
    # list of np arrays
    elec_bcm_list, ngas_list = get_bcm_arrays(EnergyType)
    
    plt.rcParams["figure.figsize"] = (param_dict["fig_len"], param_dict["fig_wid"])     
    
    # 11 rows each containing 4 figures, total 44 graphs
    fig, axes = plt.subplots(nrows = param_dict["rows"], ncols = param_dict["cols"],                             
                              constrained_layout=True)

    ax = axes.flat
    fig.suptitle(param_dict["suptitle"], fontsize = param_dict["sup_fontsize"] )
    
   
    for i in range( len( EnergyType ) ):
        ax[i].grid(alpha=.4)
        ax[i].plot( EnergyType_years_list, elec_bcm_list[i], 
                   color='red', linewidth=2, label = "Electricity" )
        ax[i].plot( EnergyType_years_list, ngas_list[i], 
                   color='olive', linewidth=2, label = "Natural Gas" )

        ax[i].set_title( EnergyType_country_list[i],fontsize = param_dict["subp_fontsize"])
        ax[i].set_xticks(np.arange(0, len(EnergyType_years_list)))
        ax[i].tick_params(axis='x', rotation=90, labelsize = param_dict["xticks_fontsize"])
        ax[i].set_xlabel(param_dict["xlabel"], fontsize = param_dict["x_fontsize"])  

        yticks = np.linspace(min(ngas_list[i]),max(elec_bcm_list[i]),13)                             
        ax[i].set_yticks(yticks)
        ax[i].tick_params(axis='y', labelsize = param_dict["yticks_fontsize"])
        ax[i].set_ylabel(param_dict["ylabel"], fontsize = param_dict["y_fontsize"])

        # show legend
        ax[i].legend(loc = 'best', fontsize='small')
        
    #fig.tight_layout()   
    plt.savefig(filename, bbox_inches='tight')    
    #plt.show()
    plt.close(fig)
     

In [3]:
def plot_double_bar_graph(CarbonEmissions_country_list, CarbonEmissions_values, Consumers_2015_value_list,
                         filename = "CarbonEmiss_Consm2015_double_bar.pdf"):
    """
    This function plots a double bar graph of each country's Co2 Emissions(million metric tons) and
    its corresponding 2015 Consumption (MTOE) using twinx() ( because the units are different )
    """
    x_axis = np.arange(len(CarbonEmissions_country_list) )
    
    with PdfPages(filename) as pdf:

        fig, ax1 = plt.subplots(1,1,figsize=(6,3))
        ax1.grid(alpha=.4)

        ax2 = ax1.twinx() 

        width = 0.3

        # Plot the carbon emission on ax1
        carb_em = ax1.bar(x_axis, CarbonEmissions_values, 
                          width=width, color='peru', align='center')

        # Plot the energy consumption for 2015 on ax2
        con_2015 = ax2.bar(x_axis + width, Consumers_2015_value_list, 
                           width=width, color='green', align='center')

        # ax1 decorations 
        ax1.set_xticks(x_axis + width/2) # set the x-ticks locations
        ax1.set_xticklabels(CarbonEmissions_country_list, fontsize=8) # label the x-axis with the country names
        
        yticks = np.linspace(min(CarbonEmissions_values),max(CarbonEmissions_values),10) # get yticks intervals
        ax1.set_yticks(yticks) # set the y-ticks locations     
        ax1.set_ylabel('Co2 Emissions (million metric tons)', 
                       color = "peru",
                       fontsize=8)
        ax1.tick_params(axis='y', 
                        rotation=0, 
                        labelcolor='peru' )

        # ax2 decorations (label y axis)
        ax2.set_yticks( np.linspace(min(Consumers_2015_value_list),max(Consumers_2015_value_list),10) )           
        ax2.set_ylabel('2015 Consumption (MTOE)', 
                       color = "green",
                       fontsize=8)
        ax2.tick_params(axis='y', 
                        rotation=0, 
                        labelcolor='green' )

        plt.title("Carbon Emission vs Consumption for 2015", fontsize = 10)

        # Rotate the tick labels and set their alignment.
        plt.setp(ax1.get_xticklabels(), rotation=45, ha="right",
                 rotation_mode="anchor")

        plt.legend([carb_em, con_2015],['Co2 Emissions (million metric tons)', '2015 Consumption (MTOE)'],
                  fontsize = 8)

        pdf.savefig(fig, dpi=500, bbox_inches='tight')
        plt.close(fig)        

            #plt.show()
            #plt.close()
            
def get_bins(values_list):
    """This function receives a list of values, calculates the bin width
       using the Square root method and returns the number of bins
       
       Source - https://www.qimacros.com/histogram-excel/how-to-determine-histogram-bin-interval/
       
    """
    # Count the number of data points.
    data_points = len(values_list)

    # Calculate the number of bins by taking the square root of the number of data points 
    # and round UP.
    sqrt = data_points ** 0.5    
    num_bins = math.ceil(sqrt)
    
    # Calculate the bin width 
    # Bin width = Range ÷ number of bins, where Range = Max - Min
    range_ = max(values_list) - min(values_list)
    bin_width = range_/ num_bins
    
    num_bins = np.arange(min(values_list), max(values_list) + bin_width, bin_width)
    
    return num_bins

In [4]:
def plot_hist(np_array, shape, param_dict, hist_type = "multiple", 
              hist_num = 1, slice_index = 10,
             filename = "AAAAAAAAAAAAAAAAAAAAA.pdf"):

    """
    This function receives the np.array, 
    shape over which to plot,
    a dictionary containing the parameters needed for plotting, 
    hist_type = "multiple" indicates subplots (like 11 rows x 4 cols to plot ALL the countries, 
                or 9 rows x 3 cols to plot ALL the years
                hist_type = "single" indicates a standalone plot (to plot China data)
    hist_num = 1 (or 2)  indicates 1 (or 2) plots on each axis. Used while plotting a separate plot 
    for EACH year/country on each axis or while plotting before-after plots on the same axis
    filename = name of the file where the plot will be saved
    
    and plots a histogram 
    """
    if hist_type == "single": # 1 plot 1 axis (for China data)
        fig, ax = plt.subplots(1,1,figsize=(6,3))  # a fig with 1 row 1 col       
        ax.grid(alpha=.4)        
        fig.suptitle( param_dict["suptitle"], fontsize = param_dict["sup_fontsize"])        
        ax.grid(alpha=.4) # show grids. Alpha controls the brightness of the grids
        
        array = np_array[slice_index] # a 1D array for China data

        num_bins = get_bins(array) # get the number of bins based on the data values
        ax.hist( array, bins=num_bins, 
                color = param_dict["color"], 
                edgecolor=param_dict["edgecolor"], alpha = param_dict["alpha"] )
        
        ax.set_xlabel(param_dict["xlabel"], fontsize = param_dict["x_fontsize"])
        ax.set_ylabel(param_dict["ylabel"], fontsize = param_dict["y_fontsize"])
        
        fig.tight_layout(pad = param_dict["pad"])
        
    else:     # 1 or 2 plots on each of n axis 
        
        # set the dimensions of the entire fig
        plt.rcParams["figure.figsize"] = (param_dict["fig_len"], param_dict["fig_wid"])
        
        # get the axes. It is 2D i.e nrows and ncols
        fig, axes = plt.subplots(nrows = param_dict["rows"], 
                             ncols = param_dict["cols"], 
                             constrained_layout = False)  
        fig.subplots_adjust(top=0.95)
        ax = axes.flat # flatten the 2D axes to get nrows*ncols axes
        fig.suptitle( param_dict["suptitle"], fontsize = param_dict["sup_fontsize"])
    
        if shape == 0: # iterate over rows
            if hist_num == 1: # one hist on each axis
                for i in range(np_array.shape[shape]):
                    array = np_array[i,:] # for each row, get all the cols
                    num_bins = get_bins(array) # get the number of bins based on the values

                    ax[i].grid(alpha=.4) # show grids
                    ax[i].hist(array, bins = num_bins,
                               color = param_dict["color"], 
                               edgecolor=param_dict["edgecolor"], alpha = param_dict["alpha"] )
                    ax[i].set_title( param_dict["subplot_title"] + param_dict["title_list"][i], 
                                    fontsize=param_dict["subp_fontsize"] )
                    ax[i].set_xlabel(param_dict["xlabel"], fontsize = param_dict["x_fontsize"])
                    ax[i].set_ylabel(param_dict["ylabel"], fontsize = param_dict["y_fontsize"])

                fig.tight_layout(pad = param_dict["pad"])

            elif hist_num == 2: # 2 hist per axis (for plotting before-after plots)
                for i in range(np_array.shape[shape]):        
                    country = Consumers_countries_list [i]

                    before_2000 = np_array[ :, :slice_index+1 ] # all rows with column <= slice_index
                    after_2000 = np_array[ :, slice_index+1:] # all rows with column > slice_index

                    num_bins1 = get_bins(before_2000[i, :]) # for each row, get all the cols  
                    num_bins2 = get_bins(after_2000[i, :]) # for each row, get all the cols 
                    
                    ax[i].grid(alpha=.4) # show grids
                    ax[i].hist( before_2000[i, :], bins = num_bins1, 
                             color = param_dict["bef_color"], edgecolor= param_dict["edgecolor"], 
                             alpha=param_dict["alpha"] )     
                    ax[i].hist( after_2000[i, :], bins = num_bins2, 
                             color = param_dict["color"], edgecolor= param_dict["edgecolor"], 
                             alpha=param_dict["aft_alpha"] )   

                    ax[i].set_title( param_dict["subplot_title"] + param_dict["title_list"][i], 
                                    fontsize = param_dict["subp_fontsize"] )
                    ax[i].set_xlabel(param_dict["xlabel"], fontsize = param_dict["x_fontsize"])
                    ax[i].set_ylabel(param_dict["ylabel"], fontsize = param_dict["y_fontsize"])
                    ax[i].legend(['before 2000', 'after 2000'], loc='best', 
                                 fontsize = param_dict["leg_fontsize"])

                fig.tight_layout(pad = param_dict["pad"])               
                

        elif shape == 1:  # iterate over columns
            if hist_num == 1:
                for i in range(np_array.shape[shape]):     
                    array = np_array[:, i] # get all the rows for each col
                    num_bins = get_bins(array) # get the number of bins

                    ax[i].grid(alpha=.4) # show grids
                    ax[i].hist(array, bins = num_bins,
                                color = param_dict["color"], 
                                edgecolor=param_dict["edgecolor"], alpha = param_dict["alpha"] )
                    ax[i].set_title( param_dict["subplot_title"] + param_dict["title_list"][i], 
                                    fontsize=param_dict["subp_fontsize"] )
                    ax[i].set_xlabel(param_dict["xlabel"], fontsize = param_dict["x_fontsize"])
                    ax[i].set_ylabel(param_dict["ylabel"], fontsize = param_dict["y_fontsize"])

                fig.tight_layout(pad = param_dict["pad"])
                
    plt.savefig(filename, bbox_inches='tight')    
    #plt.show()
    plt.close(fig)

In [5]:
def create_Consumers_dict(Consumers_countries_list, Consumers_values_list ):
    """
    This function creates a nested dictionary of each country's Consumers info
    i.e each country has "continent", "index" and "values" as keys.
    Eg: 
    {'Algeria': {'continent': 'Africa',
             'index': 36,
             'values': [22.0583796, 23.35245308, 23.91854217, ...]
             
    We need Consumers_countries_list and Consumers_values_list to create
    this dictionary.    
    
    Consumers_values_list is a list of lists i.e 
    [ [47.79839091, 50.01946971, ...],
      [49.54339902, 44.75610039, ...],
      [354.8473376, 344.3870079, ...],
      .
      .
      .
      [20.42594003, 23.49031162, ...]
    ]
    """  
    
    Consumers_dict = {}
    
    # create continents lists 
    Europe = ['Belgium', 'Czech Rep.', 'France', 'Germany', 'Italy', 'Netherlands', 
         'Poland', 'Portugal', 'Romania', 'Spain', 'Sweden', 'United Kingdom', 
         'Norway',  'Russia', # in Europe, following the United Nations classification.
         'Ukraine']
    North_America = ['Canada', 'United States', 'Mexico']
    South_America = ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Venezuela']
    Asia = ['Turkey', 'Kazakhstan', 'Uzbekistan', 'China', 'India', 
        'Indonesia', 'Japan', 'Malaysia', 'South Korea', 'Taiwan', 
        'Thailand', 'Australia', 'New Zealand']
    Africa = ['Algeria', 'Egypt', 'Nigeria', 'South Africa']
    Middle_East = ['Iran', 'Kuwait', 'Saudi Arabia', 'United Arab Emirates']
    
    # we need the index position of each country. Unlike a for loop, enumerate returns the index also
    for index, country in enumerate(Consumers_countries_list): 
        Consumers_dict[country] = {} # a nested dictionary for each country
        if country in Europe:        
            Consumers_dict[country]["continent"] = "Europe"
        elif country in North_America:
            Consumers_dict[country]["continent"] = "North America"
        elif country in South_America:
            Consumers_dict[country]["continent"] = "South America"
        elif country in Asia:
            Consumers_dict[country]["continent"] = "Asia"     
        elif country in Africa:
            Consumers_dict[country]["continent"] = "Africa"
        else:
            Consumers_dict[country]["continent"] = "Middle East"        

        Consumers_dict[country]["index"] = index
        Consumers_dict[country]["values"] = Consumers_values_list[index] # get the values using the index position 
        
    return Consumers_dict

def get_Consumers_data(data_list):
    """
    This dunction segregates countries, years and the corresponding values into separate lists
    """
    # data_list is a list of lists
    # data_list[0] gives all the years with a "," at index 0 i.e ['', '1990', '1991', '1992', '1993', ...]
    # the years start from index 1
    Consumers_years_list = data_list[0][1:]
       
    # separate country names and the corresponding values into separate lists
    Consumers_values_list = []
    Consumers_countries_list = []

    # data_list[1:] has the countries and their values data i.e ['Belgium', '47.79839091', '50.01946971',...]
    for data in data_list[1:]:
        #country = data[:1][0]
        country = data[0]
        Consumers_countries_list.append(country)
    
        values = data[1:]  # values start from index 1
        values_float_list = [] # list to store each float value 
        for val in values:
            values_float_list.append(float(val)) # Each value is a string. Convert it to float       
        Consumers_values_list.append(values_float_list)
            
    return Consumers_years_list, Consumers_countries_list, Consumers_values_list    
    

def read_file(filename):
    """
    This function accepts a filename,
    reads the file line by line,
    appends it to a record_list,
    segregates the relevant data from the file
    and returns the data
    """
    data_list = []    
    CarbonEmissions_data_list = []
    
    with open(filename, "r") as file:
        # Read the entire file
        lines = file.read().splitlines() # returns a list containing each line in the file
        
    if filename == "EnergyConsumers.txt":
        lines = lines[2:] # the actual data starts from line 2
        for line in lines:    
            line = line.replace("\t", ",").split(",")
            data_list.append(line) 
        return get_Consumers_data(data_list) # from this data_list get the countries, years and values as lists
    
    elif filename == "EnergyRawDataFinal.txt":
        columns = lines[:1] # first row is the names of columns
        EnergyType_data_list = lines[1:] # 2nd row onwards is data  
        return EnergyType_data_list
    
    elif filename == "CarbonEmissions.txt": 
        for line in lines[1:]:
            line = line.replace('\t', ',')
            CarbonEmissions_data_list.append(line)            
        return CarbonEmissions_data_list

In [6]:
if __name__ == "__main__":   
    
    # dictionary to set the various plot parameters
    param_dict = { 
        "suptitle": "", "sup_fontsize": 45,
        "subplot_title": "", "subp_fontsize": 35,
        "xlabel": "", "x_fontsize": 30,
        "ylabel": "", "y_fontsize": 30,
        "rows": 1, "cols": 1,          
        "fig_len": 50, "fig_wid": 50,                      
        "color": "red", "edgecolor": "black",   
        "bef_color": "green", "aft_color": "red",
        "alpha": 0.5, "aft_alpha": 0.3,                  
        "leg_fontsize": 18, 
        "title_list": [], "title_pad":20 
    }
    
    # 1.	Load the file EnergyConsumers.txt into a 2D array named Consumers. 
    Consumers_years_list, Consumers_countries_list, Consumers_values_list = read_file("EnergyConsumers.txt")
    Consumers_dict = create_Consumers_dict(Consumers_countries_list, Consumers_values_list)
    Consumers = np.array(Consumers_values_list) # 2D array
    
    # 2.	create a single histogram of the consumption for each country and each year from Consumers. 
    # set param for (Country-wise) - 44 plots, 11 rows, 4 cols 
    param_dict["rows"] = 11
    param_dict["cols"] = 4
    param_dict["suptitle"] = 'Country-wise Consumption for the years 1990 - 2016'
    param_dict["sup_fontsize"] = 45
    param_dict["title_list"] = Consumers_countries_list
    param_dict["subp_fontsize"] = 35
    param_dict["xlabel"] = "Consumption (MTOE)"
    param_dict["x_fontsize"] = 30
    param_dict["ylabel"] = "Count"
    param_dict["y_fontsize"] = 30  
    param_dict["pad"] = 6.0
    param_dict["fig_len"] = 45 
    param_dict["fig_wid"] = 45  
    
    # plots for all the countries in a single figure
    plot_hist(Consumers, 0, # the 2D array, shape = 0 for row-wise plotting,
              param_dict, hist_num = 1,  # param_dict to set plot params, 1 histogram per axis
              filename = "2-Consumption_all_countries.pdf")  # filename to save the plot to   
   
   
    # set param for (Year-wise) - 27 plots, 9 rows, 3 cols 
    param_dict["rows"] = 9
    param_dict["cols"] = 3
    param_dict["suptitle"] = 'Consumption for the years 1990 - 2016 for each country'
    param_dict["sup_fontsize"] = 45
    param_dict["title_list"] = Consumers_years_list
    param_dict["pad"] = 6.0
    param_dict["fig_len"] = 40 
    param_dict["fig_wid"] = 40
    param_dict["color"] = "yellow"
    
    # plots for all the years in a single figure
    plot_hist(Consumers, 1, # the 2D array, shape = 1 for column-wise plotting,
              param_dict, hist_num = 1,  # param_dict to set plot params, 1 histogram per axis
              filename = "2-Consumption_all_years.pdf")  # filename to save the plot to
    
    # plot each country and each year separately and save them using PdfPages
    # i.e 44 individual plots for countries and 27 individual plots for years
    plot_hist_each(Consumers, Consumers_countries_list, Consumers_years_list)
         
    # 3.	Load the file EnergyRawDataFinal.txt into any container     
    EnergyType_data_list = read_file("EnergyRawDataFinal.txt")   
    EnergyType, EnergyType_country_list, EnergyType_years_list = get_EnergyType(EnergyType_data_list)
    
    # 4.	Make a single creative graph comparing the Electricity 
    #       and Natural Gas for each country and each year from EnergyType.  
    
    param_dict["suptitle"] = 'Electricity vs Natural Gas in bcm for the years 1990 - 2016'
    param_dict['sup_fontsize'] = 12
    param_dict["subp_fontsize"] = 10
    param_dict["xlabel"] = 'Year'
    param_dict["x_fontsize"] = 6
    param_dict["ylabel"] = 'Consumption (bcm)'
    param_dict["y_fontsize"] = 6
    param_dict["leg_fontsize"] = 8
    param_dict["pad"] = 1.0
    param_dict["rows"] = 11
    param_dict["cols"] = 4
    param_dict["fig_len"] = 15
    param_dict["fig_wid"] = 15
    param_dict["yticks_fontsize"] = 4
    param_dict["xticks_fontsize"] = 4
    
#     # individual plots
    plot_elec_ngas_each(EnergyType, EnergyType_country_list, EnergyType_years_list)
    
    plot_elec_ngas_all(EnergyType, EnergyType_country_list, EnergyType_years_list, 
                   param_dict, filename = "4-Electricity_Natural_Gas_all_countries.pdf")

 
    # 5.	Save a 2D array named Ind with all the data corresponding to the industrial 
    #       usage type in Consumers.
    index_2000 = Consumers_years_list.index('2000') # get the index of year 2000 from Consumers_years_list
    Ind = slice_and_compute(Consumers, index_2000, 0.15, 0.35)
    
    # 6.	Create a single histogram of Ind for each country and each year. 
    # Make the color of the graph different for the two time periods (before and after 2000). 
    
    #set params for 44 plots - 11 rows - 4 cols
    param_dict["rows"] = 11
    param_dict["cols"] = 4
    param_dict["title_list"] = Consumers_countries_list
    param_dict["suptitle"] = "Industrial usage for the periods before and after year 2000"
    param_dict["sup_fontsize"] = 40
    param_dict["xlabel"] = "Industrial usage (MTOE)"
    param_dict["ylabel"] = "Count"
    param_dict["subp_fontsize"] = 35
    param_dict["x_fontsize"] = 25
    param_dict["y_fontsize"] = 25
    param_dict["leg_fontsize"]: 40
    param_dict["pad"] = 6.0
    param_dict["fig_len"] = 50 
    param_dict["fig_wid"] = 50
   
    # 2 plots per axis  
    plot_hist(Ind, 0, param_dict, hist_num = 2, # the 2D array, shape = 0 for row-wise plotting,
              slice_index = index_2000,   # param_dict to set plot params, 2 histograms on each axis,              
              filename="6-Industrial_usage_all_countries.pdf")  #index of year 2000, filename to save to   
  
    # individual plots
    plot_before_after_each(Ind, "Ind", Consumers_countries_list, index_2000)
                                               
    # 7.	Make a histogram of the consumption for China only from Consumers.     
    China_index = Consumers_countries_list.index("China") # get the position of China in Consumers_countries_list
                                                        # This way we get China's row number in Consumers 2D array
    #set params for the plot
    param_dict["suptitle"] = "China consumption from 1990 - 2016"
    param_dict["sup_fontsize"] = 10
    param_dict["xlabel"] = "Consumption (MTOE)"
    param_dict["x_fontsize"] = 8
    param_dict["ylabel"] = "Count"
    param_dict["y_fontsize"] = 8
    
    # 1 plot - 1 axis - 1 row - 1 col 
    plot_hist(Consumers, 0, param_dict, hist_type = "single", 
              slice_index = China_index, filename = "7-China_consumption.pdf" ) # the 2D array, shape = 0 for row-wise plotting, 
                                                            # param_dict to set plot params, 
                                                            # "single" for 1 plot 1 axis
                                                            # index of China and filename to save to
                
    # 8.	Create a new numpy array named Continent for usage by continent data from Consumers.
    Continent = get_Continent(Consumers_dict)
                                                     
    # 9.	Create a histogram of Continents for Only Residential usage    
    Res = slice_and_compute(Continent, index_2000, 0.85, 0.65)
    
    #set params for the plot - 6 plots - 3 rows and 2 cols
    param_dict["fig_len"] = 9.37
    param_dict["fig_wid"] = 9.37
    param_dict["rows"] = 3 
    param_dict["cols"] = 2
    param_dict["subp_fontsize"] = 15
    param_dict["sup_fontsize"] = 20
    param_dict["suptitle"] = "Residential usage of Continents before and after year 2000"
    param_dict["xlabel"] = "Residential usage (MTOE)"    
    param_dict["x_fontsize"] = 12
    param_dict["ylabel"] = "Count"
    param_dict["y_fontsize"] = 12
    param_dict["leg_fontsize"] = 7.69
    param_dict["pad"] = 1.0
    
    # 6 plots, 2 plots - 1 axis - 3 rows - 2 cols 
    plot_hist(Res, 0, param_dict, hist_num = 2, # the 2D array, shape = 0 for row-wise plotting, 
              slice_index = index_2000, filename = "9-Residential_usage_all_continents.pdf" ) # param_dict to set plot params
                                                                            # 2 histograms on each axis
                                                                            # index of year 2000
                                                                            # filename to save to
     # individual plots
    plot_before_after_each(Res, "Res", Consumers_countries_list, index_2000)
               
    # 10.	Load the file CarbonEmissions.txt into a list named CarbonEmissions.
    CarbonEmissions = read_file("CarbonEmissions.txt")
    CarbonEmissions_country_list, CarbonEmissions_values = get_CarbonEmissions(CarbonEmissions)
    
    # 11.	Using a double bar graph, compare the twenty countries’ carbon emissions (million metric tons) from CarbonEmissions 
    # to their respective consumption in Consumers for the year 2015.
    # for each country in CarbonEmissions_country_list, get the respective 2015 consumption value from Consumers
    Consumers_2015_value_list = get_2015_value(Consumers, CarbonEmissions_country_list, Consumers_countries_list)
    
    plot_double_bar_graph(CarbonEmissions_country_list, CarbonEmissions_values, 
                          Consumers_2015_value_list, filename = "11-CarbonEmissions_vs_2015_Consumption.pdf")
   
    # 12.	What are the countries with the highest energy usage? 
    #       In what continent are they located? 
    #       Use print() to answer this question.
                         
    top5_country, top5_continent = get_highest_usage(CarbonEmissions_country_list, Consumers_2015_value_list)  
    
    print("The top 5 countries with the highest energy usage are: ")
    for i in range(len(top5_country)):
        print(str(i+1) + ") " + top5_country[i] + " located in " + top5_continent[i])        




The top 5 countries with the highest energy usage are: 
1) China located in Asia
2) United States located in North America
3) India located in Asia
4) Russia located in Europe
5) Japan located in Asia
