In [1]:
# Import Modules
import pandas as pd
import numpy as np
import altair as alt
import pymannkendall as mk
import warnings


In [2]:
# Set the notebook to display all columns of a dataframe
pd.set_option("display.max_columns", None)

# Suppress warnings for clean cell outputs
warnings.filterwarnings("ignore")


# Load Data

In [3]:
# Load in cleaned & combined data - processed in long format
out_df = pd.read_csv("./data/complete_dataset_LongFormat.csv")


## **Mann-Kendall Analysis to Evaluate Trends in GHG emissions**

It appears that the high income countries are decreasing their emissions over time, while their GDP is growing. Another hypothesis is that developing countries are increasing their GHG emissions in order to lower poverty and develop. To better understand these trends and relationships overtime, it would be interesting to know how each variable is increasing or decreasing through time. Some questions that I'd like to answer are:
* Which countries show increasing trends in GDP, with decreasing trends in GHG emissions?
* Can we identify countries that have high GDP, but are serious about meeting Paris Climate Agreement goals?
* Which countries have increasing or decreasing trends in percent poverty? How is this relating to their GHG emissions over time?

To answer some of these questions above, I'm going to use the nonparametric Mann-Kendall trend test to evaluate which countries are showing increasing or decreasing trends in our variables of interest. I'll then make a tile bar visual to see the trends for each country.

### **Prepare Data for Mann-Kendall**

In [4]:
# Melt the master frame to prepare for mann-Kendall Analysis
master_df_melted = out_df.melt(
    id_vars=["Country", "Year", "c_code"],
    value_vars=["Total_GHG", "GDP_Total", "Percent_Poverty"],
    var_name="Parameter",
    value_name="Value",
)

# Filter for only data after 2010 so there
# is regular spacing between measurements
master_df_melted = master_df_melted[master_df_melted["Year"] >= 2010]


### **Build Mann-Kendall Function and Analyze Trends**

In [5]:
def mk_summarize(in_df):
    """
    Summarize function to be used to obtain Mann-Kendall Statistical
    trends for the GHG emissions, GDP total, and poverty time-series
    data. Returns a dataframe with mann-kendall results.

    Parameters:

    -in_df: a dataframe with columns Year, Parameter
    """

    out_dict = {}

    # Sort values in order by year
    in_df = in_df.sort_values("Year", ascending=True)

    # Drop missing values
    in_df = in_df[in_df["Value"].notnull()]

    # Mann Kendall Parameters
    mk_stats_params = [
        "Trend",
        "h",
        "p_value",
        "z",
        "Tau",
        "S",
        "var_s",
        "slope",
        "intercept",
    ]

    # If there are less than four samples,
    # report there is insufficient data
    if len(in_df) < 4:
        for param in mk_stats_params:
            if param == "Trend":
                out_dict[param] = "Insufficient Data"
            else:
                out_dict[param] = np.nan
        return pd.Series(out_dict, index=[key for key in out_dict])

    # Generate the Mann-Kendal Stats,
    # Perform Mann-Kendall Trend Test
    mk_trends = mk.original_test(in_df["Value"].values)

    # Assign mk outputs to values in a dictionary
    for i, val in enumerate(mk_trends):
        out_dict[mk_stats_params[i]] = val

    return pd.Series(out_dict, index=[key for key in out_dict])


# Obtain Mann-Kendall Trends in DataFrame
mk_results = (
    master_df_melted.groupby(["Country", "c_code", "Parameter"])
    .apply(mk_summarize)
    .reset_index()
)

# View the results
mk_results.sample(10)


Unnamed: 0,Country,c_code,Parameter,Trend,h,p_value,z,Tau,S,var_s,slope,intercept
326,Mauritania,MRT,Total_GHG,increasing,True,0.004208,2.862167,0.733333,33.0,125.0,0.1728571,11.42714
465,Solomon Islands,SLB,GDP_Total,increasing,True,8.3e-05,3.93548,1.0,45.0,125.0,77780690.0,1108266000.0
73,Brunei,BRN,Percent_Poverty,Insufficient Data,,,,,,,,
20,Argentina,ARG,Total_GHG,no trend,False,0.073638,-1.788854,-0.466667,-21.0,125.0,-2.724,430.903
170,Eswatini,SWZ,Total_GHG,increasing,True,0.000461,3.502303,0.888889,40.0,124.0,0.04333333,2.165
408,Poland,POL,GDP_Total,increasing,True,8.3e-05,3.93548,1.0,45.0,125.0,51595350000.0,762201500000.0
476,South Korea,KOR,Total_GHG,increasing,True,0.002358,3.041052,0.777778,35.0,125.0,7.473333,594.565
406,Philippines,PHL,Percent_Poverty,decreasing,True,0.000113,-3.861514,-0.977778,-44.0,124.0,-1.111111,11.81667
232,Indonesia,IDN,Percent_Poverty,decreasing,True,8.3e-05,-3.93548,-1.0,-45.0,125.0,-1.0,10.5
448,Serbia,SRB,Percent_Poverty,no trend,False,0.649886,-0.453921,-0.133333,-6.0,121.333333,-0.2,6.25


### **Visualize the Mann-Kendall Trends**

To visualize the Mann-Kendall Trends in the report, we will examine trends for countries that show decreasing trends in greenhouse gas emissions. We will use a tile bar visual to show green for increasing and red for decreasing trends.

In [6]:
# Only show Mann-Kendall Trends for countries with
# decreasing trends in emissions
mk_results["plot"] = mk_results.apply(
    lambda row: 1
    if row["Trend"] == "decreasing" and row["Parameter"] == "Total_GHG"
    else 0,
    axis=1,
)

# List of countries that have decreasing
# trends for GHG emissions
countries = mk_results[mk_results["plot"] == 1]["Country"].unique()

# Only show increasing or decreasing trends
mk_sig_trends = mk_results[mk_results["Country"].isin(countries)]

# For the purposes of the visual,
# show only insufficient data as no trend
mk_sig_trends["Trend"] = mk_sig_trends["Trend"].apply(
    lambda x: "no trend" if x == "Insufficient Data" else x
)

# Draw a tile chart to show increasing and decreasing trends for each country
out_chart = (
    alt.Chart(mk_sig_trends)
    .mark_rect(stroke="black", strokeWidth=0.25)
    .encode(
        x=alt.X("Country", axis=alt.Axis(labelAngle=40)),
        y=alt.Y("Parameter", axis=alt.Axis(labelFontWeight="bold")),
        color=alt.Color(
            "Trend:N",
            scale=alt.Scale(
                domain=["decreasing", "increasing", "no trend"],
                range=["red", "green", "lightgray"],
            ),
        ),
    )
    .configure_legend(orient="left", padding=30)
    .properties(title="Mann-Kendall Trends between 2010 and 2019")
    .configure_title(anchor="start", fontSize=20, dx=115, offset=10)
)

out_chart


In [7]:
%reload_ext watermark

%watermark -iv -v -m

Python implementation: CPython
Python version       : 3.10.6
IPython version      : 8.5.0

Compiler    : Clang 13.1.6 (clang-1316.0.21.2.5)
OS          : Darwin
Release     : 21.5.0
Machine     : x86_64
Processor   : i386
CPU cores   : 8
Architecture: 64bit

numpy        : 1.23.3
sys          : 3.10.6 (main, Aug 30 2022, 05:12:36) [Clang 13.1.6 (clang-1316.0.21.2.5)]
pymannkendall: 1.4.2
altair       : 4.2.0
pandas       : 1.5.0

