# Histograms with specific countries highlighted

This notebook is primarily for creating the plots used in the country dossiers of the UBA project for which this code was originally produced. 

It allows the user to highlight a country of interest within the distribution. 

In [1]:
# import modules needed for the code to run

# system 
import re
import os

# calculation
import pandas as pd
import numpy as np

# plotting
%matplotlib inline
import seaborn
import matplotlib

# global stocktake tools
from gst_tools.make_plots import *
import gst_tools.gst_utils as utils


In [2]:
# USER INPUT

# First, choose which file you want to plot the data for
#data_file_name = 'UN-population-data-2017.csv'
#data_file_name = 'PRIMAP-hist_v2.0_Energy-CO2.csv'
#data_file_name = 'PRIMAP-hist_v2.0_IPPU-KyotoGHG.csv'
#data_file_name = 'PRIMAP-hist_UN-2017_calc__CO2-per-population.csv'
#data_file_name = 'PRIMAP-hist_v2.0_KyotoGHG-AR4-total-excl-LU.csv'
#data_file_name = 'WDI2017_GDP-PPP.csv'
data_file_name = 'PRIMAP-hist_UN-2017_calc_CO2-total-excl-LU-per-population.csv'

# Labelling of plots
data_source_to_display = "PRIMAP-hist v2.0; UN"
var_name_to_display = 'per capita CO2 emissions'

# Second, choose which years you are interested in analysing
years_of_interest = ['2015']

# selected country - ISO3 country code of the country that you would like to highlight
countries_of_interest = ['IND', 'MEX', 'VNM', 'ETH']


# Save plots?
# Set the following to True if plots should be saved. 
# If False, plots will be shown on screen but not saved to a file.
save_opt = True


In [3]:
# DATA READING AND PREP

# read the data from file 
fname_in = os.path.join('proc-data', data_file_name)
data = pd.read_csv(fname_in)

# Check the data format
if not utils.verify_data_format(data):
    print('WARNING: The data is not correctly formatted! Please check before continuing!')

# extract the key information
variable = data['variable'].unique()[0]
unit = data['unit'].unique()[0]

# tidy up for next stesps
data_years = utils.set_countries_as_index(data)
data_years = data_years.dropna(axis=0, how='any')

# remove comment below to display the data
#data_years

In [4]:
# Plot 1 - make a histogram of absolute data

for this_country in countries_of_interest:
    print(this_country)
        
    for selected_year in years_of_interest:
        make_histogram(data_years[selected_year], unit, 
                   xlabel=var_name_to_display, 
                   title=('distribution of ' + var_name_to_display + ' in ' + str(selected_year)),
                   sourcename=data_source_to_display,
                   remove_outliers=True, 
                   save_plot=save_opt, 
                   selected_country=this_country,
                   plot_name=(variable + '-' + 'absolute' + '-' + str(selected_year) + '-' + this_country))


IND
---------
Making  CO2-total-excl-LU-per-population-absolute-2015-IND plot.
---------
-----------
Identifying and removing outliers
lower outliers are:
Series([], Name: 2015, dtype: float64)
upper outliers are: 
country
ARE    30.040521
BRN    22.752202
KWT    25.661912
QAT    53.595773
TTO    49.555471
Name: 2015, dtype: float64
---
bins set to range(0, 21)
MEX
---------
Making  CO2-total-excl-LU-per-population-absolute-2015-MEX plot.
---------
-----------
Identifying and removing outliers
lower outliers are:
Series([], Name: 2015, dtype: float64)
upper outliers are: 
country
ARE    30.040521
BRN    22.752202
KWT    25.661912
QAT    53.595773
TTO    49.555471
Name: 2015, dtype: float64
---
bins set to range(0, 21)
VNM
---------
Making  CO2-total-excl-LU-per-population-absolute-2015-VNM plot.
---------
-----------
Identifying and removing outliers
lower outliers are:
Series([], Name: 2015, dtype: float64)
upper outliers are: 
country
ARE    30.040521
BRN    22.752202
KWT    25.66191

In [7]:
# Plot 2 - trends

# Calculate trends and define plotting params    
# TODO - improve description here. 
trends, rolling_trends, trends_unit = utils.calculate_trends(data_years, num_years_trend=5)
trends_variable = 'Annual average change in ' + var_name_to_display

thistitle = "5-year rolling average trend in \n" + var_name_to_display + "\nin " + str(data_years.columns[-1])

# plot the trend in the final year
for this_country in countries_of_interest:
    make_histogram(rolling_trends.iloc[:,-1], trends_unit,
                   xlabel=trends_variable,
                   title=thistitle,
                   remove_outliers=True, ktuk=1.5,
                   selected_country=this_country,
                   sourcename=data_source_to_display,    
                   save_plot=save_opt,
                   plot_name=(variable + '-' + 'rolling-average' + this_country))
    

Averaging trend over 5 years.
---------
Making  CO2-total-excl-LU-per-population-rolling-averageIND plot.
---------
-----------
Identifying and removing outliers
lower outliers are:
Series([], Name: 2015, dtype: float64)
upper outliers are: 
country
MMR    14.710766
MOZ    22.507771
NIU    31.461331
Name: 2015, dtype: float64
---
bins set to range(-11, 11)
---------
Making  CO2-total-excl-LU-per-population-rolling-averageMEX plot.
---------
-----------
Identifying and removing outliers
lower outliers are:
Series([], Name: 2015, dtype: float64)
upper outliers are: 
country
MMR    14.710766
MOZ    22.507771
NIU    31.461331
Name: 2015, dtype: float64
---
bins set to range(-11, 11)
---------
Making  CO2-total-excl-LU-per-population-rolling-averageVNM plot.
---------
-----------
Identifying and removing outliers
lower outliers are:
Series([], Name: 2015, dtype: float64)
upper outliers are: 
country
MMR    14.710766
MOZ    22.507771
NIU    31.461331
Name: 2015, dtype: float64
---
bins set t

Code for testing

In [6]:
rolling_trends['2014']


country
AFG     8.646483
AGO     1.061128
ALB     5.718509
AND    -0.673255
ARE     1.911793
ARG     1.366305
ARM     5.347441
ATG    -0.004145
AUS    -2.251726
AUT    -1.505032
AZE     1.850760
BDI     9.556493
BEL    -2.697585
BEN     3.490380
BFA     5.334017
BGD     4.119975
BGR     0.798957
BHR    -0.106710
BHS     6.213402
BIH     3.855013
BLR     1.129811
BLZ    -4.748041
BOL     6.294192
BRA     4.123189
BRB    -4.825594
BRN     2.485390
BTN    10.770574
BWA    11.366646
CAF     2.195699
CAN    -0.033988
         ...    
SWE    -2.066962
SWZ     1.415253
SYC     0.344722
SYR   -11.685790
TCD     4.450765
TGO    -4.235753
THA     2.920756
TJK    10.156671
TKM     4.858583
TLS     9.767111
TON    -1.715492
TTO     1.417353
TUN     1.584459
TUR     0.884113
TUV     2.094304
TZA    10.801300
UGA     5.521817
UKR    -0.828396
URY    -2.469729
USA    -0.491703
UZB    -3.516974
VCT    -5.803216
VEN    -0.579444
VNM     4.606712
VUT     4.134188
WSM     3.195396
YEM    -2.393815
ZAF   