In [None]:
# Pandas is for formatting the data into matrices, making it easier to query
# Pickle is for storing python objects in memory for easy access later

import pandas as pd
import pickle
import sys
import os
import json

# Set options to see more of the data tables at a time

pd.options.display.max_columns = 100
pd.options.display.max_rows = 500

root = os.getcwd() + "/"
eora_file_folder = root + "EORA_files/"

In [None]:

# Re-create the EORA data in dictionary form


In [None]:
years = ["1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999",
        "2000", "2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
        "2010", "2011", "2012", "2013"]

xacts = {"years":years}
mpcts = {"years":years}

for yr in years:
    xact_file_dest = eora_file_folder + "eora_xact_" + yr + ".pkl"
    print("loading: " + xact_file_dest)
    xacts[yr] = pickle.load(open(xact_file_dest, 'rb'))

    mpct_file_dest = eora_file_folder + "eora_impact_" + yr + ".pkl"
    print("loading: " + mpct_file_dest)
    mpcts[yr] = pickle.load(open(mpct_file_dest, 'rb'))


In [None]:
# Retrieve industry types
transaction_labels = list(xacts["2013"].index.levels[1])
transaction_labels

In [None]:
# Retrieve impact types
impacts_labels = list(mpcts["2013"].index.levels[0])
impacts_labels

## This section below is for exploring trade stories between particular country, industry pairs

In [None]:
# Explore country-industry stories for specific countries, industries

trade_stories = {}

# Specify countries by ISO3 code
src_countries = ["CHN"]
# Specify industries by those included in the "transaction_labels" list
src_industries = ["Wood and Paper", "Transport Equipment", "Electrical and Machinery", "Construction"]

dst_countries = ["AGO", "DZA"]
dst_industries = ["Transport", "Transport Equipment"]

for src_c in src_countries:
    for dst_c in dst_countries:
        for src_i in src_industries:
            for dst_i in dst_industries:
                trade_name = src_c+"_"+src_i+"_to_"+dst_c+"_"+dst_i
                print(trade_name)
                trade_stories[trade_name] = {}
                for yr in xacts["years"]:
                    data = xacts[yr][dst_c][dst_i][src_c][src_i]
                    trade_stories[trade_name][yr] = data

In [None]:
# Visualize trade stories

import matplotlib.pyplot as plt

for ts in trade_stories:
        
        plt.plot(pd.Series(trade_stories[ts]))
        plt.ylabel("Trade in 000's of USD")
        plt.xlabel(ts)
        
        plt.show()
        


## This section is to analyze changes in trade between particular country, industry pairs

In [None]:
# Search for stories that meet certain criteria

## First, compare percentage changes between 1990, 2000, 2013

# For trade in all industries originating in China, 
# and going to six African countries:

## NATE: Learning Extension
#  change code to see trade from select_african_countries to CHN

src_countries = ["CHN"]
src_industries = transaction_labels

select_african_countries = ["AGO", "DZA", "BEN", "BWA", "BFA", "BDI"]               
""" Other african countries:
                "CMR", "CPV", "CAF", "TCD", "COG"
                "DJI", "EGY", "GNQ", "ERI", "ETH", "GAB",
                "GMB", "GHA", "GNB", "GIN", "CIV", "KEN",
                "LSO", "LBR", "LBY", "MDG", "MWI", "MLI",
                "MRT", "MUS", "MAR", "MOZ", "NAM", "NER",
                "NGA", "RWA", "STP", "SEN", "SYC", "SLE",
                "SOM", "ZAF", "SDN", "SWZ", "TZA", "TGO",
                "TUN", "UGA", "COD", "ZMB", "ZWE", "SSD",
                "COD"]
                
"""

dst_countries = select_african_countries
dst_industries = transaction_labels

trade_changes = {}

# Helper function to calculate the percentage change
def reconstruct_trade_val_from_perc_change_list(ls):
    recon_val = ls[0]
    for ix in range(1,len(ls)):
        recon_val = recon_val + recon_val*(ls[ix]-1)
    return(recon_val)

for src_c in src_countries:
    for dst_c in dst_countries:
        for src_i in src_industries:
            for dst_i in dst_industries:
                
                ## **** Haven't yet removed Total from EORA labels, should do that in 
                # Table construction
                if (src_i != "Total") & (dst_i != "Total"):
                    trade_name = src_c+"_"+src_i+"_to_"+dst_c+"_"+dst_i
                    print(trade_name)
                    trade_changes[trade_name] = []
                    for yr in ["1990", "2000", "2010"]:
                        if yr == "1990":
                            data = xacts[yr][dst_c][dst_i][src_c][src_i]
                            trade_changes[trade_name].append(data)
                        else:
                            data = xacts[yr][dst_c][dst_i][src_c][src_i]
                            pst_data = reconstruct_trade_val_from_perc_change_list(trade_changes[trade_name])
                            new_data = data / float(pst_data)                            
                            trade_changes[trade_name].append(new_data)

In [None]:
# Return the trades that grew by a certain threshold from 2000 to 2010

# Growth thresholds measured in %
growth_threshold = 10
# Starting values measured in thousands of USD
starting_val_threshold = 500

trades_of_interest = {}
for chng in trade_changes:
    trd_chg = trade_changes[chng]
    if (trd_chg[-1] > growth_threshold) & (trd_chg[0] > starting_val_threshold):
        trades_of_interest[chng] = trd_chg
        print(chng)

In [None]:
trades_of_interest

## Observe relationship between value of trade in an industry and specific emissions factors

In [None]:
### THIS CODE TAKES A LONG TIME
# To help you out with the initial exploration, I've saved the results 
# From the call below to "./EORA_files/textiles_value_and_CO2.pkl"
# For first look, skip 3 blocks to where you see ### QUICKSTART FROM HERE


# Create time series of total textiles and wearing apparel trade & CO2 emission by country
# Example call: xacts["2008"].loc["ZWE"].loc["Textiles and Wearing Apparel"].sum()

trade_stories = {}

# All countries
src_countries = list(xacts["2013"].index.levels[0])
# list(xacts["2013"].index.levels[0])
# No data for some countries
src_industries = ["Textiles and Wearing Apparel"]

CO2_categories = ['Biomass Burning (Gg CO2)', 'CO2 (Gg)', 'CO2 (exlc. LULUCF)',
       'CO2 (including biomass burning) from Agricultural Burning',
       'CO2 (including biomass burning) from Cement/Minerals',
       'CO2 (including biomass burning) from Energy Production',
       'CO2 (including biomass burning) from Forest Fires and Other Sources',
       'CO2 (including biomass burning) from Natural Decay',
       'CO2 (including biomass burning) from Solvents',
       'CO2 (including biomass burning) from Waste',
       'CO2 From Agricultural Burning', 'CO2 From Cement/Minerals',
       'CO2 From Energy Production', 'CO2 From Forest Fires and Other Sources',
       'CO2 From Natural Decay', 'CO2 From Solvents', 'CO2 From Waste']

def select_emissions_rows(matrix, categories):
    return(matrix.index.get_level_values("Category").isin(categories))

#CHN_CO2_from_prod_by_year = {
#    "1990":eora_impact_1990["CHN"][select_emissions_rows(eora_impact_1990["CHN"], CO2_categories)],
#    "2000":eora_impact_2000["CHN"][select_emissions_rows(eora_impact_2000["CHN"], CO2_categories)],
#    "2013":eora_impact_2013["CHN"][select_emissions_rows(eora_impact_2013["CHN"], CO2_categories)]
#}

#dst_countries = ["AGO"]
#dst_industries = ["Wood and Paper"]

for src_c in src_countries:
    print(src_c)
    #for dst_c in dst_countries:
    if (src_c != "ROW"):
        for src_i in src_industries:
            #for dst_i in dst_industries:

            # Store trade value
            trade_name = src_c+"_"+src_i+"_trade_value"
            #trade_name_list = src_c+"_"+src_i+"trade_value_list"

            trade_stories[trade_name] = {}
            #trade_stories[trade_name_list] = []

            # Store impacts value
            impact_name = src_c+"_"+src_i+"_CO2_emissions"
            #impact_name_list = src_c+"_"+src_i+"_CO2_emissions_list"

            trade_stories[impact_name] = {}
            #trade_stories[impact_name_list] = []

            for yr in xacts["years"]:
                trade_value_data = xacts[yr].loc[src_c].loc[src_i].sum()
                trade_stories[trade_name][yr] = trade_value_data
                #trade_stories[trade_name_list].append(trade_value_data)

                matrix = mpcts[yr][src_c]
                CO2_emission_data = matrix[select_emissions_rows(matrix, CO2_categories)][src_i].sum()
                trade_stories[impact_name][yr] = CO2_emission_data
                #trade_stories[impact_name_list].append(CO2_emission_data)

In [None]:
# Number of stories... if only looking at one trade, 
# this will be # of countries
len(pd.Series([c[0:3] for c in list(trade_stories.keys())]).unique())

In [None]:
# Dump trade_stories for Textiles and Wearing Apparel Industry
twa_info_dest = eora_file_folder + "textiles_value_and_CO2.pkl"
pickle.dump(trade_stories, open(twa_info_dest, 'wb'))

In [None]:
### QUICKSTART FROM HERE

# Load trade_stories for Textiles and Wearing Apparel Industry
twa_info_dest = eora_file_folder + "textiles_value_and_CO2.pkl"
twa_trade_stories = pickle.load(open(twa_info_dest, 'rb'))

In [None]:
# Inspect which industry this reflects data for
list(twa_trade_stories.keys())[0].split("_")[1]

In [None]:
## This block will create the colored graphs we discussed previously
# This pdf will be saved to a the EORA_files folder 
# I've included the first version for you, at 
# "./EORA_files/global_textile_GDP_CO2_coupling_progressions.pdf"


# Show GDP and CO2 decoupling
# Saves output to a pdf, doesn't show inline
# If you want to show inline, uncomment plt.show() near the bottom of this block
# as well as the %matplotlib inline command

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
#%matplotlib inline

# List all countries
src_countries = list(pd.Series([c[0:3] for c in list(twa_trade_stories.keys())]).unique())
#src_industries = ["Textiles and Wearing Apparel"]
# This assumes that each trade story only has one trade type in it
src_industries = [list(twa_trade_stories.keys())[0].split("_")[1]]

pp = PdfPages(eora_file_folder+"global_textile_GDP_CO2_coupling_progressions.pdf")

# Plot trade by emissions
for src_c in src_countries:
    if src_c != "ROW":
        for src_i in src_industries:
            
            trade_name = src_c+"_"+src_i+"_trade_value"
            impact_name = src_c+"_"+src_i+"_CO2_emissions"
                
            """
            fig, ax = plt.subplots(nrows=1, ncols=1)
            ax.plot(trade_stories[ts])
            ax.set_title(ts)
            ax.set_xlabel("Trade Value")
            ax.set_ylabel("CO2 Emissions")
            """

            trade_value = list(twa_trade_stories[trade_name].values())
            CO2_emissions = list(twa_trade_stories[impact_name].values())
            time = range(0,len(trade_value))
            
            fig, ax = plt.subplots(nrows=1, ncols=1)
            
            ax.scatter(trade_value, CO2_emissions, c = time)

            fig.suptitle(src_i+" Industry in "+src_c)
            
            ax.set_xlabel("Production Value in 000's of USD")
            ax.set_ylabel("CO2 emissions in Gigagrams")
            
            plt.show()
            pp.savefig(fig)
            plt.close(fig)
            
pp.close()


In [None]:
# Loop over trade data - create vectors with the first value,
# And then percent changes.

len(twa_trade_stories)

# Classify each time step as:
# > = increase, < = decrease
# 1) Production >, CO2 >
# 2) Production <, CO2 >
# 3) Production >, CO2 <
# 4) Production <, CO2 <

twa_trade_changes = {}

# In this case, all countries
src_countries = list(pd.Series([c[0:3] for c in list(twa_trade_stories.keys())]).unique())
# In this case, Textiles and Wearing Apparel
src_industries = [list(twa_trade_stories.keys())[0].split("_")[1]]

for src_c in src_countries:
    for src_i in src_industries:

        ## **** Haven't yet removed Total from EORA labels, should do that in 
        # Table construction
        if (src_i != "Total"):
            
            # Same names as in trade_stories
            production_values_deltas = src_c+"_"+src_i+"_trade_value"
            CO2_emissions_deltas = src_c+"_"+src_i+"_CO2_emissions"
            industry_alerts = src_c+"_"+src_i+"_alerts"
            
            twa_trade_changes[production_values_deltas] = []
            twa_trade_changes[CO2_emissions_deltas] = []
            twa_trade_changes[industry_alerts] = {}
            
            prod_data = twa_trade_stories[production_values_deltas]
            CO2_data = twa_trade_stories[CO2_emissions_deltas]
            
            for yr in list(prod_data.keys()):
                
                #print("Year = ", yr)
                if yr == "1990":
                    p_data = prod_data[yr]
                    c_data = CO2_data[yr]
                    
                    twa_trade_changes[production_values_deltas].append(p_data)
                    twa_trade_changes[CO2_emissions_deltas].append(c_data)
                    twa_trade_changes[industry_alerts][yr] = ["start record", 1]
                    
                else:
                    
                    p_data = prod_data[yr]
                    c_data = CO2_data[yr]
                    
                    pst_p_data = prod_data[str(int(yr)-1)]
                    pst_c_data = CO2_data[str(int(yr)-1)]
                    
                    # Subtract one to get percentage change
                    p_perc_change = (p_data / float(pst_p_data))-1
                    c_perc_change = (c_data / float(pst_c_data))-1
                    
                    twa_trade_changes[production_values_deltas].append(p_perc_change)
                    twa_trade_changes[CO2_emissions_deltas].append(c_perc_change)
                    
                    # Classify event
                    
                    if (p_perc_change > 0) and (c_perc_change > 0):
                        this_year_alert = ["Production up, emissions up"]
                    elif (p_perc_change < 0) and (c_perc_change > 0):
                        this_year_alert = ["Production down, emissions up"]
                    elif (p_perc_change > 0) and (c_perc_change < 0):
                        this_year_alert = ["Production up, emissions down"]
                    elif (p_perc_change < 0) and (c_perc_change < 0):
                        this_year_alert = ["Production down, emissions down"]
                    else:
                        this_year_alert = ["Rare case: world stable"]
                    
                    # See if this is the same as the last event
                    
                    last_yr = str(int(yr)-1)
                    #print(last_yr)
                    
                    last_year_alert = twa_trade_changes[industry_alerts][last_yr]
                    
                    #print(this_year_alert)
                    #print(last_year_alert)
                    #print(twa_trade_changes[industry_alerts])
                    
                    # Careful - append doesn't return anything
                    if this_year_alert[0] == last_year_alert[0]:
                        len_trend = last_year_alert[1]
                        this_year_alert.append(len_trend + 1)
                        twa_trade_changes[industry_alerts][yr] = this_year_alert
                    else:
                        this_year_alert.append(1)
                        twa_trade_changes[industry_alerts][yr] = this_year_alert


# Contains percent changes in production value and CO2 emissions
# As well as some sample alerts, and trend notifications
pickle.dump(twa_trade_changes, open(eora_file_folder+"twa_trade_changes.pkl", "wb"))

## Extension - create a metric to store information 
# about the magnitude of change, and when it is "significant"



## Investigate - are changes like observed in the United States' 
## decrease in emissions compensated by increased imports with embodied CO2?
# For countries that have streaks of decreasing emissions,
# investigate whether their imports are increasing, and if so, from where
# Tie this to the trends in the exporting countries - 
# tied to increased emissions?

In [None]:
### View the "alerts" for production and emission trends for the USA
###

USA_alerts = twa_trade_changes["USA_Textiles and Wearing Apparel_alerts"]

print(json.dumps(USA_alerts, indent=1, sort_keys=True))

from collections import Counter
Counter(elem for elem in list(zip(*(list(USA_alerts.values()))))[0])