In [None]:
# Pandas is for formatting the data into matrices, making it easier to query
# Numpy is used for some mathematical functions, such as matrix multiplication
# Datetime is used to assess how long it takes code blocks to run
# Fiona is used for loading country shapefiles, and for exporting the results as a shapefile
# Pickle is for storing and python objects in memory for easy access later
# GeoPandas is for programmatically mapping

import pandas as pd
import numpy as np
import datetime
import fiona
# import shapely, can be used for spatial computations
import pickle
# import scipy as sp, can be used for running statistical tests
import geopandas as gpd

# Matplotlib inline to visualize maps in jupyter notebook
# Use PdfPages to print plots to a pdf
# pyplot helps with laying out plots
%matplotlib inline
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

# Set options to see more of the data tables at a time

pd.options.display.max_rows = 200
pd.options.display.max_columns = 200

In [None]:
# Load IPUMS census data for China, includes Industry General response
# i.e. which industry employed people report working for


IPUMS_files = 


f = open("/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/IPUMS_files/Chinese_INDGEN_ipumsi.dat", "r")
# Another load method:
# ipums = np.fromfile("/Users/nathansuberi/Downloads/ipumsi_00003.dat")

data = f.readlines()
dataex = pd.Series(data)
print("number of records: ", len(data))
print("example of a household record: ", data[1])

# The IPUMS survey data are provided as coded strings of numbers
# The following code blocks create the dictionaries that are used to decode these strings
# And create data frames with human readable information

# All of the conversion files are adapted from the conversion codes
# Provided by IPUMS with their data download
# And stored in the folder below:

file_loc = "/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/IPUMS_Conversions/"

In [None]:
# Create dictionary for country names
country_names = pd.read_csv(file_loc + "country_names.csv", header=None)

ref = country_names.apply(lambda row: row[0].split("   "), axis=1)
code, country = ref.apply(lambda row :row[0]), ref.apply(lambda row :row[1])
country_names = pd.DataFrame({"code":code, "country":country})
country_names.set_index(["code"], inplace=True)
print(country_names.loc["156"])

# Create dictionary for Industry of Employment
# Have to change the sep, because there are commas in these column titles
ind_gen = pd.read_csv(file_loc + "ind_gen.csv", header=None, sep="\t")

ref = ind_gen.apply(lambda row: row[0].split("   "), axis=1)
code, ind_gen = ref.apply(lambda row :row[0]), ref.apply(lambda row :row[1])
ind_gen = pd.DataFrame({"code":code, "ind_gen":ind_gen})
ind_gen.set_index(["code"], inplace=True)
print(ind_gen.loc["010"])

# Create dictionary for Employment Status
emp_status = pd.read_csv(file_loc + "emp_stat.csv", header=None)

ref = emp_status.apply(lambda row: row[0].split("   "), axis=1)
code, emp_stat = ref.apply(lambda row :row[0]), ref.apply(lambda row :row[1])
emp_status = pd.DataFrame({"code":code, "emp_stat":emp_stat})
emp_status.set_index(["code"], inplace=True)
#emp_status

# Create dictionary for Detailed Employment Status
# Have to change the sep, because there are commas in these column titles
emp_status_d = pd.read_csv(file_loc + "emp_stat_detailed.csv", header=None, sep="\t")

ref = emp_status_d.apply(lambda row: row[0].split("   "), axis=1)
code, emp_stat_d = ref.apply(lambda row :row[0]), ref.apply(lambda row :row[1])
emp_status_d = pd.DataFrame({"code":code, "emp_stat_d":emp_stat_d})
emp_status_d.set_index(["code"], inplace=True)
#emp_status_d

# Create dictionary for Class Work
# Have to change the sep, because there are commas in these column titles
class_wk = pd.read_csv(file_loc + "class_wk.csv", header=None, sep="\t")

ref = class_wk.apply(lambda row: row[0].split("   "), axis=1)
code, class_wk = ref.apply(lambda row :row[0]), ref.apply(lambda row :row[1])
class_wk = pd.DataFrame({"code":code, "class_wk":class_wk})
class_wk.set_index(["code"], inplace=True)
# class_wk

# Create dictionary for Detailed Class Work Status
# Have to change the sep, because there are commas in these column titles
class_wk_d = pd.read_csv(file_loc + "class_wk_detailed.csv", header=None, sep="\t")

ref = class_wk_d.apply(lambda row: row[0].split("   "), axis=1)
code, class_wk_d = ref.apply(lambda row :row[0]), ref.apply(lambda row :row[1])
class_wk_d = pd.DataFrame({"code":code, "class_wk_d":class_wk_d})
class_wk_d.set_index(["code"], inplace=True)
#class_wk_d

In [None]:
# Now create decoded dataframe
# Takes 22 minutes for all >30,000,000 of the Chinese records

start_time = datetime.datetime.now()
print("start creating data frame:", start_time)

datadf = pd.DataFrame({
    "Country": country_names.loc[dataex.apply(lambda row: row[0:3])]["country"].values,
    "Year": dataex.apply(lambda row: str(row[3:7])).values,
    "GeoLev2": dataex.apply(lambda row: str(row[7:16])).values,
    "IndGen": ind_gen.loc[dataex.apply(lambda row: row[16:19])]["ind_gen"].values
}, index=range(0,len(dataex)))

end_time = datetime.datetime.now()
print("end", end_time - start_time)

print(datadf)

# Use Pickle to save the decoded Chinese census responses for re-use
china_indgen_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/IPUMS_GeoLev2_Pickles/China_Indgen_DF.pkl'

# Dump to file
pickle.dump(datadf, open(china_indgen_filename, 'wb'))


In [None]:
## Can start analysis here, once original IPUMS data is decoded and saved to a pickle

In [None]:
# Load Chinese census response data from pickle file
china_indgen_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/IPUMS_GeoLev2_Pickles/China_Indgen_DF.pkl'

start_time = datetime.datetime.now()
print("start loading data frame:", start_time)

china_indgen_df = pickle.load(open(china_indgen_filename, 'rb'))

end_time = datetime.datetime.now()
print("finished in: ", end_time - start_time)

In [None]:
# Examine the Chinese census response data 
# regarding General Industry people are employed in
china_indgen_df

In [None]:
# Create a MultiIndex using the GeoLev2 and Year to organize the census responses
# https://pandas.pydata.org/pandas-docs/stable/advanced.html

start_time = datetime.datetime.now()
print("start creating multi-index:", start_time)

admin_year_tuples = list(zip(*[china_indgen_df["GeoLev2"],china_indgen_df["Year"]]))
indgen_multi_index = pd.MultiIndex.from_tuples(admin_year_tuples, names=["GeoLev2", "Year"])

china_indgen_df.index = indgen_multi_index

end_time = datetime.datetime.now()
print("finished in: ", end_time - start_time)

In [None]:
# Use this MultiIndex on the census response data to loop over that data
# And store the distribution of employment for each GeoLev2 in each year

# Option: calculate employment as counts or proportions per GeoLev2
# Counts are preferable for this application, as I want to downscale the total value
# I think is generated in that GeoLev2 for a certain industry
# I am using national trade accounts, and distributing total production value
# According to number of people employed in relevant industries in each GeoLev2 
# relative to national total employment in relevant industries.

# def return_props(cts,key):
#     return round(cts[key]/cts.sum(), 4) if key in cts.keys() else 0

# The ternary operation here checks if the employment type is represented in the GeoLev2 & Year pair
# If not, it returns 0. This way all jobs are always accounted for.
# Multiply by 100 because we only have 1% of the Chinese census
# Use round function to make sure we return an integer count
def return_counts(cts,key,sample_perc):
    # print("before: ", round(cts[key],0) if key in cts.keys() else 0)
    # print("after: ", round((100/sample_perc)*cts[key],0) if key in cts.keys() else 0)
    return(round((100/sample_perc)*cts[key],0) if key in cts.keys() else 0)

start_time = datetime.datetime.now()
print("start creating rows:", start_time)

# Create an empty list to store employment numbers by GeoLev2 & Year (one record for each pair)

rows = []
# Percentage of census included in China IPUMS microdata sample
china_sample_perc = 1
# Loop over all geolev2s
for gl2 in indgen_multi_index.levels[0]:
    # Loop over all years
    for yr in indgen_multi_index.levels[1]:
        # print(gl2)
        # print(yr)
        
        # This line accounts for fact that not all geolev2's have data for all years
        # Ex: china_indgen_df.loc['156042090'] does not contain data for all years
        # And will throw an error if the program tries to access that index 
        # with the command china_indgen_df.loc[gl2].loc[yr]
        if(yr in china_indgen_df.loc[gl2]["Year"].unique()):
            cts = china_indgen_df.loc[gl2].loc[yr]["IndGen"].value_counts()
            total = cts.sum()

            row = {
                "GeoLev2":gl2,
                "Year":yr,
                "Total Population in Sample": total,
                
                "NIU (not in universe)": return_counts(cts, "\"NIU (not in universe)\"",china_sample_perc), 
                "Agriculture, fishing, and forestry": return_counts(cts, "\"Agriculture, fishing, and forestry\"",china_sample_perc), 
                "Manufacturing": return_counts(cts, "\"Manufacturing\"",china_sample_perc), 
                "Construction": return_counts(cts, "\"Construction\"",china_sample_perc), 
                "Wholesale and retail trade": return_counts(cts, "\"Wholesale and retail trade\"",china_sample_perc), 
                "Transportation, storage and communications": return_counts(cts, "\"Transportation, storage and communications\"",china_sample_perc), 
                "Education": return_counts(cts, "\"Education\"",china_sample_perc), 
                "Mining": return_counts(cts, "\"Mining\"",china_sample_perc), 
                "Public administration and defense": return_counts(cts, "\"Public administration and defense\"",china_sample_perc), 
                "Other services": return_counts(cts, "\"Other services\"",china_sample_perc), 
                "Hotels and restaurants": return_counts(cts, "\"Hotels and restaurants\"",china_sample_perc), 
                "Health and social work": return_counts(cts, "\"Health and social work\"",china_sample_perc), 
                "Unknown": return_counts(cts, "\"Unknown\"",china_sample_perc), 
                "Electricity, gas and water": return_counts(cts, "\"Electricity, gas and water\"",china_sample_perc), 
                "Real estate and business services": return_counts(cts, "\"Real estate and business services\"",china_sample_perc), 
                "Financial services and insurance": return_counts(cts, "\"Financial services and insurance\"",china_sample_perc), 
                "Other industry, n.e.c.": return_counts(cts, "\"Other industry, n.e.c.\"",china_sample_perc),
                "Services, not specified": return_counts(cts, "\"Services, not specified\"",china_sample_perc),
                "Private household services": return_counts(cts, "\"Private household services\"",china_sample_perc),
                "Response suppressed": return_counts(cts, "\"Response suppressed\"",china_sample_perc),
            }
            rows.append(row)

# Create the dataframe of all GeoLev2 & Year combo, employment counts         
employment_by_geolev2 = pd.DataFrame(rows)

# Use Pickle to save the decoded Chinese census responses for re-use
china_employment_by_geolev2_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/IPUMS_GeoLev2_Pickles/China_IndGen_by_GeoLev2_DF.pkl'

# Dump to file
pickle.dump(employment_by_geolev2, open(china_employment_by_geolev2_filename, 'wb'))

end_time = datetime.datetime.now()
print("finished in: ", end_time - start_time)

In [None]:
# Load Chinese employment summary data from pickle file
china_employment_by_geolev2_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/IPUMS_GeoLev2_Pickles/China_IndGen_by_GeoLev2_DF.pkl'

start_time = datetime.datetime.now()
print("start loading data frame:", start_time)

china_employment_by_geolev2 = pickle.load(open(china_employment_by_geolev2_filename, 'rb'))
# Create MultiIndex by GeoLev2 and Year for easier querying

admin_year_tuples = list(zip(*[china_employment_by_geolev2["GeoLev2"], china_employment_by_geolev2["Year"]]))

indgen_geolev2_multi_index = pd.MultiIndex.from_tuples(admin_year_tuples, names=["GeoLev2", "Year"])
china_employment_by_geolev2.index = indgen_geolev2_multi_index

# Don't need these columns anymore, as the information is stored in the MultiIndex
china_employment_by_geolev2.drop(["GeoLev2", "Year"], inplace=True, axis=1)

end_time = datetime.datetime.now()
print("finished in: ", end_time - start_time)

In [None]:
# Inspect employment summary data
china_employment_by_geolev2

In [None]:
# Can now request data by GeoLev2 code and year
china_employment_by_geolev2.loc["156011001"].loc["2000"]

In [None]:
# Can evaluate statistical tests to see if there are significant differences in 
# employment distribution by GeoLev2 / Year... are there differences to investigate?


# X^2 test, see if these distributions are different
# With a 1% sample of the total, how likely is it that these are 
# representative of the population distributions?

# Need to evaluate if variations can be considered due to sample variance, or population variance


In [None]:
# Extension: continue to downscale using other data

# Global Urban Rural Mapping Project (GRUMP) to analyze urban / rural breakdown
# MapSPAM to analyze agricultural production



In [None]:
# Import WorldGeoLev2 shapefile to attach the results of the jobs / trade analysis
world_geolev2_boundary_shapefile = "/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/world_geolev2/world_geolev2.shp"

c = fiona.open(world_geolev2_boundary_shapefile)
# Create an iterable collection that can be looped over
coll = iter(c)

# Only choose those polygons that are in China, and have a value for their GEOLEVEL2
china_geolev2_polys = []

for poly in coll:
    # print(poly["properties"].keys())
    if((poly["properties"]["CNTRY_NAME"] == "China") and (poly["properties"]["GEOLEVEL2"] != None)):
            china_geolev2_polys.append(poly)

In [None]:
# Should be 199 to match 
print(len(china_geolev2_polys) == len(china_employment_by_geolev2.index.levels[0]))
print(len(china_geolev2_polys))

In [None]:
# QA/QC: Check to see that the 
valid_shp = []
invalid_shp = []

for poly in china_geolev2_polys:
    if(poly["properties"]["GEOLEVEL2"] not in china_employment_by_geolev2.index.levels[0]):
        print("uh oh...")
        print(poly["properties"]["GEOLEVEL2"])
        invalid_shp.append(poly["properties"]["GEOLEVEL2"])
    else:
        valid_shp.append(poly["properties"]["GEOLEVEL2"])
        

In [None]:
# One geolev2 doesn't match between the two
len(valid_shp)

In [None]:
# Check to see which china_employment_by_geolev2is invalid
valid_emp = []
invalid_emp = []
# Loop over the GeoLev2s in the IPUMS census data, store the ones that match between the
# Shapefile and the IPUMS data
# A bit strange, because they both come from IPUMS

### FLAG THIS FOR PAT @ IPUMS

for gl2 in china_employment_by_geolev2.index.levels[0]:
    if(gl2 not in valid_shp):
        print("uh oh...")
        print(gl2)
        invalid_emp.append(gl2)
    else:
        valid_emp.append(gl2)

In [None]:
# Check to see that the valid geolev2s are the same
sorted(valid_emp) == sorted(valid_shp)

In [None]:
final_china_geolev2_polys = []
# Loop over the china_geolev2_polys, if they have employment data (i.e. in valid_shp list)
# Attach the employment data to the poly
for poly in china_geolev2_polys:
    if(poly["properties"]["GEOLEVEL2"] in valid_shp):
        # Adds employment data for all years to the shapefile matching each GeoLev2
        poly["properties"]["job_data"] = china_employment_by_geolev2.loc[poly["properties"]["GEOLEVEL2"]]
        final_china_geolev2_polys.append(poly)

In [None]:
# Inspect the first GeoLev2's properties, corresponds to Beijing Municipality
final_china_geolev2_polys[0]["properties"]

In [None]:
# Import trade volume and impacts from production data from EORA

eora_xact_1990_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/EORA_files/eora_xact_1990.pkl'
eora_xact_2000_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/EORA_files/eora_xact_2000.pkl'
eora_xact_2013_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/EORA_files/eora_xact_2013.pkl'

eora_impact_1990_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/EORA_files/eora_impact_1990.pkl'
eora_impact_2000_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/EORA_files/eora_impact_2000.pkl'
eora_impact_2013_filename  = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/EORA_files/eora_impact_2013.pkl'

eora_xact_1990 = pickle.load(open(eora_xact_1990_filename, 'rb'))
eora_xact_2000 = pickle.load(open(eora_xact_2000_filename, 'rb'))
eora_xact_2013 = pickle.load(open(eora_xact_2013_filename, 'rb'))

eora_impact_1990 = pickle.load(open(eora_impact_1990_filename, 'rb'))
eora_impact_2000 = pickle.load(open(eora_impact_2000_filename, 'rb'))
eora_impact_2013 = pickle.load(open(eora_impact_2013_filename, 'rb'))


In [None]:
# Import EORA data, create dict of yearly data



In [None]:
# Create a mapping between the job types in IPUMS
# And the trade types in EORA

jobs = ["NIU (not in universe)", 
        "Agriculture, fishing, and forestry", 
        "Manufacturing", 
        "Construction", 
        "Wholesale and retail trade", 
        "Transportation, storage and communications", 
        "Education", 
        "Mining", 
        "Public administration and defense", 
        "Other services", 
        "Hotels and restaurants", 
        "Health and social work", 
        "Unknown", 
        "Electricity, gas and water", 
        "Real estate and business services",
        "Financial services and insurance", 
        "Other industry, n.e.c.", 
        "Services, not specified", 
        "Private household services", 
        "Response suppressed"
       ]

trades = ['Agriculture',
         'Fishing',
         'Mining and Quarrying',
         'Food & Beverages',
         'Textiles and Wearing Apparel',
         'Wood and Paper',
         'Petroleum, Chemical and Non-Metallic Mineral Products',
         'Metal Products',
         'Electrical and Machinery',
         'Transport Equipment',
         'Other Manufacturing',
         'Recycling',
         'Electricity, Gas and Water',
         'Construction',
         'Maintenance and Repair',
         'Wholesale Trade',
         'Retail Trade',
         'Hotels and Restraurants',
         'Transport',
         'Post and Telecommunications',
         'Finacial Intermediation and Business Activities',
         'Public Administration',
         'Education, Health and Other Services',
         'Private Households',
         'Others',
         'Re-export & Re-import']

job_to_trade_mapping = {
    "NIU (not in universe)":[],
    "Agriculture, fishing, and forestry":["Agriculture", "Fishing", "Wood and Paper"],
    "Mining":["Mining and Quarrying", "Petroleum, Chemical and Non-Metallic Mineral Products", "Metal Products", "Other Manufacturing", "Construction"],
    "Manufacturing":["Other Manufacturing", "Electrical and Machinery"],
    "Electricity, gas and water":["Electricity, Gas and Water"],
    "Construction":["Construction"],
    # Re-export & Re-import here added to get data in 1990
    "Wholesale and retail trade":["Wholesale Trade", "Retail Trade", "Food & Beverages", "Textiles and Wearing Apparel", "Re-export & Re-import"],
    # EORA trade sector misspelled
    "Hotels and restaurants":["Hotels and Restraurants"],
    "Transportation, storage and communications":["Transport Equipment", "Transport", "Post and Telecommunications"],
    # EORA trade sector misspelled
    "Financial services and insurance":["Finacial Intermediation and Business Activities"],
    "Public administration and defense":["Public Administration", "Recycling"],
    "Services, not specified":["Private Households", "Others", "Education, Health and Other Services", "Maintenance and Repair"],
    "Real estate and business services":["Finacial Intermediation and Business Activities"],
    "Education":["Education, Health and Other Services"],
    "Health and social work":["Education, Health and Other Services", "Public Administration"],
    # Private Households added here just so that there will be employment in that section
    "Other services":["Education, Health and Other Services", "Maintenance and Repair", "Private Households"],
    "Private household services":["Private Households"],
    "Other industry, n.e.c.":["Others", "Re-export & Re-import"],
    "Response suppressed":[],
    "Unknown":["Others"]
}

# Include job_to_trade_mapping as an input to emphasize that these are subjective
# Can potential remove jobs here, and infer from the job_data passed
# Then fct could be: create_job_to_trade_weights(job_data, trades, job_to_trade_mapping)

def convert_job_data_to_trade_data(job_data, job_to_trade_mapping, jobs, trades):
    
    jobs = jobs
    trades = trades

    conversion_matrix = pd.DataFrame( np.zeros(( len(jobs) , len(trades) )), index = jobs, columns = trades )

    for job, matching_trades in job_to_trade_mapping.items():
        # Ternary to accomplish two goals:
        # 1) Avoid division by 0 if there are no matching trades for a IPUMS job type
        # 2) Weight the conversion to EORA trades by how many trades matched each job type
        ## **** This assumes that each trade is equally represented in the job reports...
        ## **** This is highly unrealistic, and should be addressed
        
        ## **** EX, weight agriculture according to MapSPAM data for that area
        ## **** and fishing according to distance from ocean / lake shores
        ## **** there is room to expand use of spatial proxies here
        
        
        conversion_matrix.loc[job][matching_trades] = 1 / (len(matching_trades) if len(matching_trades) > 0 else 1)

    # Reorganize the columns in the job_data, only keep the columns matching job counts
    job_data_organized = job_data[conversion_matrix.index]
    # Use matrix multiplication to distribute the job data among the associated trades
    jobs_per_trade_data = job_data_organized.dot(conversion_matrix).fillna(value=0)

    return(jobs_per_trade_data)
    

In [None]:
# Select years for the Chinese census
years = list(china_employment_by_geolev2.index.levels[1])

# Create an empty dataframe for the trade_employment_totals
national_trade_employment_totals = pd.DataFrame(np.zeros((len(years), len(trades))), index = years, columns = trades)

# Loop over the polygons where we've stored the job_data, attach trade_data
for poly in final_china_geolev2_polys:
    
    # Select the job_data for the GeoLev2, all years
    job_data = poly["properties"]["job_data"]
    # Distribute job data among the associated trades
    jobs_per_trade_data = convert_job_data_to_trade_data(job_data, job_to_trade_mapping, jobs, trades)

    # Add this GeoLev2's employment to national totals, will use to normalize later
    ## **** Perhaps can do this with a simple matrix addition?
    for trade in jobs_per_trade_data:
        ## **** Possibility of a year not being represented for a GeoLev2 is taken care of in previous steps
        ## **** Verify this
        for year in years:
            national_trade_employment_totals[trade].loc[year] =  national_trade_employment_totals[trade].loc[year] + jobs_per_trade_data[trade].loc[year]
    # Attach trade data to shapefile    
    poly["properties"]["jobs_per_trade_data"] = jobs_per_trade_data

In [None]:
# Inspect total estimated employment per trade sector, per year for China

national_trade_employment_totals

In [None]:
# Normalize the GeoLev2 trade data as %'s of national employment in each trade
for poly in final_china_geolev2_polys:
    # Divide trade_data by totals
    ## **** Have a division by 0 here... results in NaNs. Is ok for future computations.
    # Handle using the DataFrame.fillna(value=0) function
    poly["properties"]["normed_jobs_per_trade_data"] = poly["properties"]["jobs_per_trade_data"].div(national_trade_employment_totals, axis="index").fillna(value=0)


In [None]:
# Validate

pre = final_china_geolev2_polys[1]["properties"]["jobs_per_trade_data"]["Agriculture"]
proc = national_trade_employment_totals["Agriculture"]

post = final_china_geolev2_polys[1]["properties"]["normed_jobs_per_trade_data"]["Agriculture"]

print("before: ", pre)
print("div by: ", proc)
print("after: ", post)

# Compare
print("")
print("Should be true if division performed correctly: ")
print(pre["1982"] / proc["1982"] == post["1982"])

ag_emp_total_1982 = 0
for poly in final_china_geolev2_polys:
    ag_emp_total_1982 = ag_emp_total_1982 + poly["properties"]["jobs_per_trade_data"]["Agriculture"]["1982"]

print("")
print("Should be true if totals add up: ")
print(ag_emp_total_1982 == national_trade_employment_totals["Agriculture"].loc["1982"])

In [None]:
# Ready to distribute the EORA trade values 
# using the normalized trade data calculated above

# China's trade in 000's of USD
# Remember - production stored in columns of EORA matrices
CHN_xact_by_year = {
    "1990":eora_xact_1990["CHN"],
    "2000":eora_xact_2000["CHN"],
    "2013":eora_xact_2013["CHN"]
}

# Select only CO2 emissions

# List of impacts to select for
eora_impact_2013.index.levels[0]

CO2_categories = ['Biomass Burning (Gg CO2)', 'CO2 (Gg)', 'CO2 (exlc. LULUCF)',
       'CO2 (including biomass burning) from Agricultural Burning',
       'CO2 (including biomass burning) from Cement/Minerals',
       'CO2 (including biomass burning) from Energy Production',
       'CO2 (including biomass burning) from Forest Fires and Other Sources',
       'CO2 (including biomass burning) from Natural Decay',
       'CO2 (including biomass burning) from Solvents',
       'CO2 (including biomass burning) from Waste',
       'CO2 From Agricultural Burning', 'CO2 From Cement/Minerals',
       'CO2 From Energy Production', 'CO2 From Forest Fires and Other Sources',
       'CO2 From Natural Decay', 'CO2 From Solvents', 'CO2 From Waste']

def select_emissions_rows(matrix, categories):
    return(matrix.index.get_level_values("Category").isin(categories))

# China's CO2 impacts from production
CHN_CO2_from_prod_by_year = {
    "1990":eora_impact_1990["CHN"][select_emissions_rows(eora_impact_1990["CHN"], CO2_categories)],
    "2000":eora_impact_2000["CHN"][select_emissions_rows(eora_impact_2000["CHN"], CO2_categories)],
    "2013":eora_impact_2013["CHN"][select_emissions_rows(eora_impact_2013["CHN"], CO2_categories)]
}

## **** Think about this: china_indgen_df.loc['156042090'] does not contain data for all years

In [None]:
# Select years for the Chinese census that are included in the EORA data
years = list(china_employment_by_geolev2.index.levels[1])
# Hardcode for now
## **** Think on how to automate
years_in_eora = ["1990", "2000"]

# Create an empty dataframe for the EORA trade and CO2 from production sums
EORA_CHN_trade_value_totals = pd.DataFrame(np.zeros((len(years_in_eora), len(trades))), index = years_in_eora, columns = trades)
EORA_CHN_CO2_from_production_totals = pd.DataFrame(np.zeros((len(years_in_eora), len(trades))), index = years_in_eora, columns = trades)

# Loop through relevant years, and all trades, 
# calculate the sums of trade value and CO2 produced per trade
for year in years_in_eora:
    for trade in trades:
        EORA_CHN_trade_value_totals[trade].loc[year] = CHN_xact_by_year[year][trade].sum()
        EORA_CHN_CO2_from_production_totals[trade].loc[year] = CHN_CO2_from_prod_by_year[year][trade].sum()


In [None]:
# Inspect trade value totals, in 1,000's of USD

EORA_CHN_trade_value_totals

In [None]:
# Inspect CO2 from production totals, in Gg CO2 emitted

EORA_CHN_CO2_from_production_totals

In [None]:
# Loop over the polys, multiply normalized_trade_data by trade_amount

for poly in final_china_geolev2_polys:
    # Select normed data from poly
    normed_jobs_per_trade_data = poly["properties"]["normed_jobs_per_trade_data"] 
    # Drop years if they are not included in EORA
    keep_rows = normed_jobs_per_trade_data.index.isin(years_in_eora)
    normed_jobs_per_trade_data_EORA_years = normed_jobs_per_trade_data[keep_rows]
    
    poly["properties"]["downscaled_trade_value"] = (normed_jobs_per_trade_data_EORA_years*EORA_CHN_trade_value_totals).fillna(value=0)
    poly["properties"]["downscaled_impacts_from_production"] = (normed_jobs_per_trade_data_EORA_years*EORA_CHN_CO2_from_production_totals).fillna(value=0)
    
    # Value of 9999 => was infinity (zero value for 1990, some value for 2000)
    # Value of -1 => both years were 0, or there was some value for 1990 and zero value for 2000
    poly["properties"]["per_change_downscaled_trade_value"] = (poly["properties"]["downscaled_trade_value"].loc["2000"].div(poly["properties"]["downscaled_trade_value"].loc["1990"], axis="index").fillna(value=0) - 1).replace([np.inf, -np.inf], 9999)
    poly["properties"]["per_change_downscaled_impacts_from_production"] = (poly["properties"]["downscaled_impacts_from_production"].loc["2000"].div(poly["properties"]["downscaled_impacts_from_production"].loc["1990"], axis="index").fillna(value=0) - 1).replace([np.inf, -np.inf], 9999)

In [None]:
# Inspect trade value from Beijing Municipality

final_china_geolev2_polys[0]["properties"]["downscaled_trade_value"]

In [None]:
## **** Can use .replace([np.inf, -np.inf], np.nan) on the df calc above
# But is a bit misleading to replace the inf below with 0
# ... what will happen when graphing?

final_china_geolev2_polys[0]["properties"]["per_change_downscaled_trade_value"]

In [None]:
# Inspect CO2 from production from Beijing Municipality

final_china_geolev2_polys[0]["properties"]["downscaled_impacts_from_production"]

# QUESTION: How can Agro value go up so much, but emissions go down?

In [None]:
final_china_geolev2_polys[0]["properties"]["per_change_downscaled_impacts_from_production"]


In [None]:
final_china_geolev2_polys[0]["properties"].keys()

In [None]:
# Export final polys as pickle
final_china_geolev2_polys_file_dest = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/Country_EORA_IPUMS_Downscale/final_china_geolev2_polys.pkl'
pickle.dump(final_china_geolev2_polys, open(final_china_geolev2_polys_file_dest, 'wb'))

In [None]:
# Load final polys
final_china_geolev2_polys_file_dest = '/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/Country_EORA_IPUMS_Downscale/final_china_geolev2_polys.pkl'
final_china_geolev2_polys = pickle.load(open(final_china_geolev2_polys_file_dest, 'rb'))



In [None]:
final_china_geolev2_polys[0]["properties"]["per_change_downscaled_trade_value"]

In [None]:
# Use fiona to export to a shapefile, for viz in Carto
## Data schema:
## **** Creating the schema would need to be automated as well... think on it

#### Add in analysis above, schema and export below, 
# for % change in trade and CO2 emissions




schema = {
    'geometry':'Polygon',
    'properties': {
        'GEOLEVEL2': 'int',
        
        '19_t_ag': 'float',
        '19_c_ag': 'float',

        '19_t_fsh': 'float',
        '19_c_fsh': 'float',

        '19_t_maq': 'float',
        '19_c_maq': 'float',

        '19_t_fab': 'float',
        '19_c_fab': 'float',

        '19_t_tawa': 'float',
        '19_c_tawa': 'float',

        '19_t_wap': 'float',
        '19_c_wap': 'float',

        '19_t_pcnmp': 'float',
        '19_c_pcnmp': 'float',

        '19_t_mp': 'float',
        '19_c_mp': 'float',

        '19_t_eam': 'float',
        '19_c_eam': 'float',

        '19_t_te': 'float',
        '19_c_te': 'float',

        '19_t_om': 'float',
        '19_c_om': 'float',

        '19_t_rcycl': 'float',
        '19_c_rcycl': 'float',

        '19_t_egw': 'float',
        '19_c_egw': 'float',

        '19_t_con': 'float',
        '19_c_con': 'float',

        '19_t_mar': 'float',
        '19_c_mar': 'float',

        '19_t_wt': 'float',
        '19_c_wt': 'float',

        '19_t_rt': 'float',
        '19_c_rt': 'float',

        # Careful, misspelled in EORA, restraurants
        '19_t_har': 'float',
        '19_c_har': 'float',

        '19_t_xprt': 'float',
        '19_c_xprt': 'float',

        '19_t_pat': 'float',
        '19_c_pat': 'float',

        # Careful, misspelled in EORA, finacial
        '19_t_fiba': 'float',
        '19_c_fiba': 'float',

        '19_t_pa': 'float',
        '19_c_pa': 'float',

        '19_t_ehos': 'float',
        '19_c_ehos': 'float',

        '19_t_ph': 'float',
        '19_c_ph': 'float',

        '19_t_oth': 'float',
        '19_c_oth': 'float',
        
        # There are no carbon emissions from re-export & re-import
        '19_t_reri': 'float',
        # '19_c_reri': 'float',



        '20_t_ag': 'float',
        '20_c_ag': 'float',

        '20_t_fsh': 'float',
        '20_c_fsh': 'float',

        '20_t_maq': 'float',
        '20_c_maq': 'float',

        '20_t_fab': 'float',
        '20_c_fab': 'float',

        '20_t_tawa': 'float',
        '20_c_tawa': 'float',

        '20_t_wap': 'float',
        '20_c_wap': 'float',

        '20_t_pcnmp': 'float',
        '20_c_pcnmp': 'float',

        '20_t_mp': 'float',
        '20_c_mp': 'float',

        '20_t_eam': 'float',
        '20_c_eam': 'float',

        '20_t_te': 'float',
        '20_c_te': 'float',

        '20_t_om': 'float',
        '20_c_om': 'float',

        '20_t_rcycl': 'float',
        '20_c_rcycl': 'float',

        '20_t_egw': 'float',
        '20_c_egw': 'float',

        '20_t_con': 'float',
        '20_c_con': 'float',

        '20_t_mar': 'float',
        '20_c_mar': 'float',

        '20_t_wt': 'float',
        '20_c_wt': 'float',

        '20_t_rt': 'float',
        '20_c_rt': 'float',

        # Careful, misspelled in EORA, restraurants
        '20_t_har': 'float',
        '20_c_har': 'float',

        '20_t_xprt': 'float',
        '20_c_xprt': 'float',

        '20_t_pat': 'float',
        '20_c_pat': 'float',

        # Careful, misspelled in EORA, finacial
        '20_t_fiba': 'float',
        '20_c_fiba': 'float',

        '20_t_pa': 'float',
        '20_c_pa': 'float',

        '20_t_ehos': 'float',
        '20_c_ehos': 'float',

        '20_t_ph': 'float',
        '20_c_ph': 'float',

        '20_t_oth': 'float',
        '20_c_oth': 'float',
        
        # There are no carbon emissions from re-export & re-import
        '20_t_reri': 'float',
        # '20_c_reri': 'float',
        
        
        
        'pc_t_ag': 'float',
        'pc_c_ag': 'float',

        'pc_t_fsh': 'float',
        'pc_c_fsh': 'float',

        'pc_t_maq': 'float',
        'pc_c_maq': 'float',

        'pc_t_fab': 'float',
        'pc_c_fab': 'float',

        'pc_t_tawa': 'float',
        'pc_c_tawa': 'float',

        'pc_t_wap': 'float',
        'pc_c_wap': 'float',

        'pc_t_pcnmp': 'float',
        'pc_c_pcnmp': 'float',

        'pc_t_mp': 'float',
        'pc_c_mp': 'float',

        'pc_t_eam': 'float',
        'pc_c_eam': 'float',

        'pc_t_te': 'float',
        'pc_c_te': 'float',

        'pc_t_om': 'float',
        'pc_c_om': 'float',

        'pc_t_rcycl': 'float',
        'pc_c_rcycl': 'float',

        'pc_t_egw': 'float',
        'pc_c_egw': 'float',

        'pc_t_con': 'float',
        'pc_c_con': 'float',

        'pc_t_mar': 'float',
        'pc_c_mar': 'float',

        'pc_t_wt': 'float',
        'pc_c_wt': 'float',

        'pc_t_rt': 'float',
        'pc_c_rt': 'float',

        # Careful, misspelled in EORA, restraurants
        'pc_t_har': 'float',
        'pc_c_har': 'float',

        'pc_t_xprt': 'float',
        'pc_c_xprt': 'float',

        'pc_t_pat': 'float',
        'pc_c_pat': 'float',

        # Careful, misspelled in EORA, finacial
        'pc_t_fiba': 'float',
        'pc_c_fiba': 'float',

        'pc_t_pa': 'float',
        'pc_c_pa': 'float',

        'pc_t_ehos': 'float',
        'pc_c_ehos': 'float',

        'pc_t_ph': 'float',
        'pc_c_ph': 'float',

        'pc_t_oth': 'float',
        'pc_c_oth': 'float',
        
        # There are no carbon emissions from re-export & re-import
        'pc_t_reri': 'float',
        #'pc_c_reri': 'float'
        
    }
}

# Write out to shapefile

file_destination = "/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/Country_EORA_IPUMS_Downscale"

with fiona.open(file_destination + "/china_trade_and_CO2_impacts", "w", "ESRI Shapefile", schema) as c:
    for poly in final_china_geolev2_polys:
        
        # Store properties for easier access
        props = poly["properties"]
        
        c.write({
            'geometry':poly["geometry"],
            'properties': {
                'GEOLEVEL2': props["GEOLEVEL2"],
                
                '19_t_ag': props["downscaled_trade_value"]["Agriculture"].loc["1990"],
                '19_c_ag': props["downscaled_impacts_from_production"]["Agriculture"].loc["1990"],

                '19_t_fsh': props["downscaled_trade_value"]["Fishing"].loc["1990"],
                '19_c_fsh': props["downscaled_impacts_from_production"]["Fishing"].loc["1990"],

                '19_t_maq': props["downscaled_trade_value"]["Mining and Quarrying"].loc["1990"],
                '19_c_maq': props["downscaled_impacts_from_production"]["Mining and Quarrying"].loc["1990"],

                '19_t_fab': props["downscaled_trade_value"]["Food & Beverages"].loc["1990"],
                '19_c_fab': props["downscaled_impacts_from_production"]["Food & Beverages"].loc["1990"],

                '19_t_tawa': props["downscaled_trade_value"]["Textiles and Wearing Apparel"].loc["1990"],
                '19_c_tawa': props["downscaled_impacts_from_production"]["Textiles and Wearing Apparel"].loc["1990"],

                '19_t_wap': props["downscaled_trade_value"]["Wood and Paper"].loc["1990"],
                '19_c_wap': props["downscaled_impacts_from_production"]["Wood and Paper"].loc["1990"],

                '19_t_pcnmp': props["downscaled_trade_value"]["Petroleum, Chemical and Non-Metallic Mineral Products"].loc["1990"],
                '19_c_pcnmp': props["downscaled_impacts_from_production"]["Petroleum, Chemical and Non-Metallic Mineral Products"].loc["1990"],

                '19_t_mp': props["downscaled_trade_value"]["Metal Products"].loc["1990"],
                '19_c_mp': props["downscaled_impacts_from_production"]["Metal Products"].loc["1990"],

                '19_t_eam': props["downscaled_trade_value"]["Electrical and Machinery"].loc["1990"],
                '19_c_eam': props["downscaled_impacts_from_production"]["Electrical and Machinery"].loc["1990"],

                '19_t_te': props["downscaled_trade_value"]["Transport Equipment"].loc["1990"],
                '19_c_te': props["downscaled_impacts_from_production"]["Transport Equipment"].loc["1990"],

                '19_t_om': props["downscaled_trade_value"]["Other Manufacturing"].loc["1990"],
                '19_c_om': props["downscaled_impacts_from_production"]["Other Manufacturing"].loc["1990"],

                '19_t_rcycl': props["downscaled_trade_value"]["Recycling"].loc["1990"],
                '19_c_rcycl': props["downscaled_impacts_from_production"]["Recycling"].loc["1990"],

                '19_t_egw': props["downscaled_trade_value"]["Electricity, Gas and Water"].loc["1990"],
                '19_c_egw': props["downscaled_impacts_from_production"]["Electricity, Gas and Water"].loc["1990"],

                '19_t_con': props["downscaled_trade_value"]["Construction"].loc["1990"],
                '19_c_con': props["downscaled_impacts_from_production"]["Construction"].loc["1990"],

                '19_t_mar': props["downscaled_trade_value"]["Maintenance and Repair"].loc["1990"],
                '19_c_mar': props["downscaled_impacts_from_production"]["Maintenance and Repair"].loc["1990"],

                '19_t_wt': props["downscaled_trade_value"]["Wholesale Trade"].loc["1990"],
                '19_c_wt': props["downscaled_impacts_from_production"]["Wholesale Trade"].loc["1990"],

                '19_t_rt': props["downscaled_trade_value"]["Retail Trade"].loc["1990"],
                '19_c_rt': props["downscaled_impacts_from_production"]["Retail Trade"].loc["1990"],

                # Careful, misspelled in EORA, restraurants
                '19_t_har': props["downscaled_trade_value"]["Hotels and Restraurants"].loc["1990"],
                '19_c_har': props["downscaled_impacts_from_production"]["Hotels and Restraurants"].loc["1990"],

                '19_t_xprt': props["downscaled_trade_value"]["Transport"].loc["1990"],
                '19_c_xprt': props["downscaled_impacts_from_production"]["Transport"].loc["1990"],

                '19_t_pat': props["downscaled_trade_value"]["Post and Telecommunications"].loc["1990"],
                '19_c_pat': props["downscaled_impacts_from_production"]["Post and Telecommunications"].loc["1990"],

                # Careful, misspelled in EORA, finacial
                '19_t_fiba': props["downscaled_trade_value"]["Finacial Intermediation and Business Activities"].loc["1990"],
                '19_c_fiba': props["downscaled_impacts_from_production"]["Finacial Intermediation and Business Activities"].loc["1990"],

                '19_t_pa': props["downscaled_trade_value"]["Public Administration"].loc["1990"],
                '19_c_pa': props["downscaled_impacts_from_production"]["Public Administration"].loc["1990"],

                '19_t_ehos': props["downscaled_trade_value"]["Education, Health and Other Services"].loc["1990"],
                '19_c_ehos': props["downscaled_impacts_from_production"]["Education, Health and Other Services"].loc["1990"],

                '19_t_ph': props["downscaled_trade_value"]["Private Households"].loc["1990"],
                '19_c_ph': props["downscaled_impacts_from_production"]["Private Households"].loc["1990"],

                '19_t_oth': props["downscaled_trade_value"]["Others"].loc["1990"],
                '19_c_oth': props["downscaled_impacts_from_production"]["Others"].loc["1990"],
                
                # There are no carbon emissions from re-export & re-import
                '19_t_reri': props["downscaled_trade_value"]["Re-export & Re-import"].loc["1990"],
                #'19_c_reri': props["downscaled_impacts_from_production"]["Re-export & Re-import"].loc["1990"],




                '20_t_ag': props["downscaled_trade_value"]["Agriculture"].loc["2000"],
                '20_c_ag': props["downscaled_impacts_from_production"]["Agriculture"].loc["2000"],

                '20_t_fsh': props["downscaled_trade_value"]["Fishing"].loc["2000"],
                '20_c_fsh': props["downscaled_impacts_from_production"]["Fishing"].loc["2000"],

                '20_t_maq': props["downscaled_trade_value"]["Mining and Quarrying"].loc["2000"],
                '20_c_maq': props["downscaled_impacts_from_production"]["Mining and Quarrying"].loc["2000"],

                '20_t_fab': props["downscaled_trade_value"]["Food & Beverages"].loc["2000"],
                '20_c_fab': props["downscaled_impacts_from_production"]["Food & Beverages"].loc["2000"],

                '20_t_tawa': props["downscaled_trade_value"]["Textiles and Wearing Apparel"].loc["2000"],
                '20_c_tawa': props["downscaled_impacts_from_production"]["Textiles and Wearing Apparel"].loc["2000"],

                '20_t_wap': props["downscaled_trade_value"]["Wood and Paper"].loc["2000"],
                '20_c_wap': props["downscaled_impacts_from_production"]["Wood and Paper"].loc["2000"],

                '20_t_pcnmp': props["downscaled_trade_value"]["Petroleum, Chemical and Non-Metallic Mineral Products"].loc["2000"],
                '20_c_pcnmp': props["downscaled_impacts_from_production"]["Petroleum, Chemical and Non-Metallic Mineral Products"].loc["2000"],

                '20_t_mp': props["downscaled_trade_value"]["Metal Products"].loc["2000"],
                '20_c_mp': props["downscaled_impacts_from_production"]["Metal Products"].loc["2000"],

                '20_t_eam': props["downscaled_trade_value"]["Electrical and Machinery"].loc["2000"],
                '20_c_eam': props["downscaled_impacts_from_production"]["Electrical and Machinery"].loc["2000"],

                '20_t_te': props["downscaled_trade_value"]["Transport Equipment"].loc["2000"],
                '20_c_te': props["downscaled_impacts_from_production"]["Transport Equipment"].loc["2000"],

                '20_t_om': props["downscaled_trade_value"]["Other Manufacturing"].loc["2000"],
                '20_c_om': props["downscaled_impacts_from_production"]["Other Manufacturing"].loc["2000"],

                '20_t_rcycl': props["downscaled_trade_value"]["Recycling"].loc["2000"],
                '20_c_rcycl': props["downscaled_impacts_from_production"]["Recycling"].loc["2000"],

                '20_t_egw': props["downscaled_trade_value"]["Electricity, Gas and Water"].loc["2000"],
                '20_c_egw': props["downscaled_impacts_from_production"]["Electricity, Gas and Water"].loc["2000"],

                '20_t_con': props["downscaled_trade_value"]["Construction"].loc["2000"],
                '20_c_con': props["downscaled_impacts_from_production"]["Construction"].loc["2000"],

                '20_t_mar': props["downscaled_trade_value"]["Maintenance and Repair"].loc["2000"],
                '20_c_mar': props["downscaled_impacts_from_production"]["Maintenance and Repair"].loc["2000"],

                '20_t_wt': props["downscaled_trade_value"]["Wholesale Trade"].loc["2000"],
                '20_c_wt': props["downscaled_impacts_from_production"]["Wholesale Trade"].loc["2000"],

                '20_t_rt': props["downscaled_trade_value"]["Retail Trade"].loc["2000"],
                '20_c_rt': props["downscaled_impacts_from_production"]["Retail Trade"].loc["2000"],

                # Careful, misspelled in EORA, restraurants
                '20_t_har': props["downscaled_trade_value"]["Hotels and Restraurants"].loc["2000"],
                '20_c_har': props["downscaled_impacts_from_production"]["Hotels and Restraurants"].loc["2000"],

                '20_t_xprt': props["downscaled_trade_value"]["Transport"].loc["2000"],
                '20_c_xprt': props["downscaled_impacts_from_production"]["Transport"].loc["2000"],

                '20_t_pat': props["downscaled_trade_value"]["Post and Telecommunications"].loc["2000"],
                '20_c_pat': props["downscaled_impacts_from_production"]["Post and Telecommunications"].loc["2000"],

                # Careful, misspelled in EORA, finacial
                '20_t_fiba': props["downscaled_trade_value"]["Finacial Intermediation and Business Activities"].loc["2000"],
                '20_c_fiba': props["downscaled_impacts_from_production"]["Finacial Intermediation and Business Activities"].loc["2000"],

                '20_t_pa': props["downscaled_trade_value"]["Public Administration"].loc["2000"],
                '20_c_pa': props["downscaled_impacts_from_production"]["Public Administration"].loc["2000"],

                '20_t_ehos': props["downscaled_trade_value"]["Education, Health and Other Services"].loc["2000"],
                '20_c_ehos': props["downscaled_impacts_from_production"]["Education, Health and Other Services"].loc["2000"],

                '20_t_ph': props["downscaled_trade_value"]["Private Households"].loc["2000"],
                '20_c_ph': props["downscaled_impacts_from_production"]["Private Households"].loc["2000"],

                '20_t_oth': props["downscaled_trade_value"]["Others"].loc["2000"],
                '20_c_oth': props["downscaled_impacts_from_production"]["Others"].loc["2000"],
                
                # There are no carbon emissions from re-export & re-import
                '20_t_reri': props["downscaled_trade_value"]["Re-export & Re-import"].loc["2000"],
                #'20_c_reri': props["downscaled_impacts_from_production"]["Re-export & Re-import"].loc["2000"],
                
                
                
                'pc_t_ag': props["per_change_downscaled_trade_value"]["Agriculture"],
                'pc_c_ag': props["per_change_downscaled_impacts_from_production"]["Agriculture"],

                'pc_t_fsh': props["per_change_downscaled_trade_value"]["Fishing"],
                'pc_c_fsh': props["per_change_downscaled_impacts_from_production"]["Fishing"],

                'pc_t_maq': props["per_change_downscaled_trade_value"]["Mining and Quarrying"],
                'pc_c_maq': props["per_change_downscaled_impacts_from_production"]["Mining and Quarrying"],

                'pc_t_fab': props["per_change_downscaled_trade_value"]["Food & Beverages"],
                'pc_c_fab': props["per_change_downscaled_impacts_from_production"]["Food & Beverages"],

                'pc_t_tawa': props["per_change_downscaled_trade_value"]["Textiles and Wearing Apparel"],
                'pc_c_tawa': props["per_change_downscaled_impacts_from_production"]["Textiles and Wearing Apparel"],

                'pc_t_wap': props["per_change_downscaled_trade_value"]["Wood and Paper"],
                'pc_c_wap': props["per_change_downscaled_impacts_from_production"]["Wood and Paper"],

                'pc_t_pcnmp': props["per_change_downscaled_trade_value"]["Petroleum, Chemical and Non-Metallic Mineral Products"],
                'pc_c_pcnmp': props["per_change_downscaled_impacts_from_production"]["Petroleum, Chemical and Non-Metallic Mineral Products"],

                'pc_t_mp': props["per_change_downscaled_trade_value"]["Metal Products"],
                'pc_c_mp': props["per_change_downscaled_impacts_from_production"]["Metal Products"],

                'pc_t_eam': props["per_change_downscaled_trade_value"]["Electrical and Machinery"],
                'pc_c_eam': props["per_change_downscaled_impacts_from_production"]["Electrical and Machinery"],

                'pc_t_te': props["per_change_downscaled_trade_value"]["Transport Equipment"],
                'pc_c_te': props["per_change_downscaled_impacts_from_production"]["Transport Equipment"],

                'pc_t_om': props["per_change_downscaled_trade_value"]["Other Manufacturing"],
                'pc_c_om': props["per_change_downscaled_impacts_from_production"]["Other Manufacturing"],

                'pc_t_rcycl': props["per_change_downscaled_trade_value"]["Recycling"],
                'pc_c_rcycl': props["per_change_downscaled_impacts_from_production"]["Recycling"],

                'pc_t_egw': props["per_change_downscaled_trade_value"]["Electricity, Gas and Water"],
                'pc_c_egw': props["per_change_downscaled_impacts_from_production"]["Electricity, Gas and Water"],

                'pc_t_con': props["per_change_downscaled_trade_value"]["Construction"],
                'pc_c_con': props["per_change_downscaled_impacts_from_production"]["Construction"],

                'pc_t_mar': props["per_change_downscaled_trade_value"]["Maintenance and Repair"],
                'pc_c_mar': props["per_change_downscaled_impacts_from_production"]["Maintenance and Repair"],

                'pc_t_wt': props["per_change_downscaled_trade_value"]["Wholesale Trade"],
                'pc_c_wt': props["per_change_downscaled_impacts_from_production"]["Wholesale Trade"],

                'pc_t_rt': props["per_change_downscaled_trade_value"]["Retail Trade"],
                'pc_c_rt': props["per_change_downscaled_impacts_from_production"]["Retail Trade"],

                # Careful, misspelled in EORA, restraurants
                'pc_t_har': props["per_change_downscaled_trade_value"]["Hotels and Restraurants"],
                'pc_c_har': props["per_change_downscaled_impacts_from_production"]["Hotels and Restraurants"],

                'pc_t_xprt': props["per_change_downscaled_trade_value"]["Transport"],
                'pc_c_xprt': props["per_change_downscaled_impacts_from_production"]["Transport"],

                'pc_t_pat': props["per_change_downscaled_trade_value"]["Post and Telecommunications"],
                'pc_c_pat': props["per_change_downscaled_impacts_from_production"]["Post and Telecommunications"],

                # Careful, misspelled in EORA, finacial
                'pc_t_fiba': props["per_change_downscaled_trade_value"]["Finacial Intermediation and Business Activities"],
                'pc_c_fiba': props["per_change_downscaled_impacts_from_production"]["Finacial Intermediation and Business Activities"],

                'pc_t_pa': props["per_change_downscaled_trade_value"]["Public Administration"],
                'pc_c_pa': props["per_change_downscaled_impacts_from_production"]["Public Administration"],

                'pc_t_ehos': props["per_change_downscaled_trade_value"]["Education, Health and Other Services"],
                'pc_c_ehos': props["per_change_downscaled_impacts_from_production"]["Education, Health and Other Services"],

                'pc_t_ph': props["per_change_downscaled_trade_value"]["Private Households"],
                'pc_c_ph': props["per_change_downscaled_impacts_from_production"]["Private Households"],

                'pc_t_oth': props["per_change_downscaled_trade_value"]["Others"],
                'pc_c_oth': props["per_change_downscaled_impacts_from_production"]["Others"],
                
                # There are no carbon emissions from re-export & re-import
                'pc_t_reri': props["per_change_downscaled_trade_value"]["Re-export & Re-import"],
                #'pc_c_reri': props["per_change_downscaled_impacts_from_production"]["Re-export & Re-import"]

                
                
                
            }
        })




In [None]:
file_destination = "/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/Country_EORA_IPUMS_Downscale"

china_province_data = gpd.read_file(file_destination+"/china_trade_and_CO2_impacts")

shp_key_to_readable_titles_conversion = {
    
    '19_t_ag': '1990_trade_agriculture',
    '19_c_ag': '1990_CO2_agriculture',

    '19_t_fsh': '1990_trade_fishing',
    '19_c_fsh': '1990_CO2_fishing',

    '19_t_maq': '1990_trade_mining_and_quarrying',
    '19_c_maq': '1990_CO2_mining_and_quarrying',

    '19_t_fab': '1990_trade_food_and_beverages',
    '19_c_fab': '1990_CO2_food_and_beverages',

    '19_t_tawa': '1990_trade_textiles_and_wearing_apparel',
    '19_c_tawa': '1990_CO2_textiles_and_wearing_apparel',

    '19_t_wap': '1990_trade_wood_and_paper',
    '19_c_wap': '1990_CO2_wood_and_paper',

    '19_t_pcnmp': '1990_trade_petroleum_chemical_and_non-metallic_mineral_products',
    '19_c_pcnmp': '1990_CO2_petroleum_chemical_and_non-metallic_mineral_products',

    '19_t_mp': '1990_trade_metal_products',
    '19_c_mp': '1990_CO2_metal_products',

    '19_t_eam': '1990_trade_electrical_and_machinery',
    '19_c_eam': '1990_CO2_electrical_and_machinery',

    '19_t_te': '1990_trade_transport_equipment',
    '19_c_te': '1990_CO2_transport_equipment',

    '19_t_om': '1990_trade_other_manufacturing',
    '19_c_om': '1990_CO2_other_manufacturing',

    '19_t_rcycl': '1990_trade_recycling',
    '19_c_rcycl': '1990_CO2_recyling',

    '19_t_egw': '1990_trade_electricity_gas_and_water',
    '19_c_egw': '1990_CO2_electricity_gas_and_water',

    '19_t_con': '1990_trade_construction',
    '19_c_con': '1990_CO2_construction',

    '19_t_mar': '1990_trade_maintenance_and_repair',
    '19_c_mar': '1990_CO2_maintenance_and_repair',

    '19_t_wt': '1990_trade_wholesale_trade',
    '19_c_wt': '1990_CO2_wholesale_trade',

    '19_t_rt': '1990_trade_retail_trade',
    '19_c_rt': '1990_CO2_retail_trade',

    # Careful, misspelled in EORA, restraurants
    '19_t_har': '1990_trade_hotels_and_restaurants',
    '19_c_har': '1990_CO2_hotels_and_restaurants',

    '19_t_xprt': '1990_trade_transport',
    '19_c_xprt': '1990_CO2_transport',

    '19_t_pat': '1990_trade_post_and_telecommunications',
    '19_c_pat': '1990_CO2_post_and_telecommunications',

    # Careful, misspelled in EORA, finacial
    '19_t_fiba': '1990_trade_financial_intermediation_and_business_activities',
    '19_c_fiba': '1990_CO2_financial_intermediation_and_business_activities',

    '19_t_pa': '1990_trade_public_administration',
    '19_c_pa': '1990_CO2_public_administration',

    '19_t_ehos': '1990_trade_education_health_and_other_services',
    '19_c_ehos': '1990_CO2_education_health_and_other_services',

    '19_t_ph': '1990_trade_private_households',
    '19_c_ph': '1990_CO2_private_households',

    '19_t_oth': '1990_trade_others',
    '19_c_oth': '1990_CO2_others',

    '19_t_reri': '1990_trade_re-export_and_re-import',
    '19_c_reri': '1990_CO2_re-export_and_re-import',



    '20_t_ag': '2000_trade_agriculture',
    '20_c_ag': '2000_CO2_agriculture',

    '20_t_fsh': '2000_trade_fishing',
    '20_c_fsh': '2000_CO2_fishing',

    '20_t_maq': '2000_trade_mining_and_quarrying',
    '20_c_maq': '2000_CO2_mining_and_quarrying',

    '20_t_fab': '2000_trade_food_and_beverages',
    '20_c_fab': '2000_CO2_food_and_beverages',

    '20_t_tawa': '2000_trade_textiles_and_wearing_apparel',
    '20_c_tawa': '2000_CO2_textiles_and_wearing_apparel',

    '20_t_wap': '2000_trade_wood_and_paper',
    '20_c_wap': '2000_CO2_wood_and_paper',

    '20_t_pcnmp': '2000_trade_petroleum_chemical_and_non-metallic_mineral_products',
    '20_c_pcnmp': '2000_CO2_petroleum_chemical_and_non-metallic_mineral_products',

    '20_t_mp': '2000_trade_metal_products',
    '20_c_mp': '2000_CO2_metal_products',

    '20_t_eam': '2000_trade_electrical_and_machinery',
    '20_c_eam': '2000_CO2_electrical_and_machinery',

    '20_t_te': '2000_trade_transport_equipment',
    '20_c_te': '2000_CO2_transport_equipment',

    '20_t_om': '2000_trade_other_manufacturing',
    '20_c_om': '2000_CO2_other_manufacturing',

    '20_t_rcycl': '2000_trade_recycling',
    '20_c_rcycl': '2000_CO2_recyling',

    '20_t_egw': '2000_trade_electricity_gas_and_water',
    '20_c_egw': '2000_CO2_electricity_gas_and_water',

    '20_t_con': '2000_trade_construction',
    '20_c_con': '2000_CO2_construction',

    '20_t_mar': '2000_trade_maintenance_and_repair',
    '20_c_mar': '2000_CO2_maintenance_and_repair',

    '20_t_wt': '2000_trade_wholesale_trade',
    '20_c_wt': '2000_CO2_wholesale_trade',

    '20_t_rt': '2000_trade_retail_trade',
    '20_c_rt': '2000_CO2_retail_trade',

    # Careful, misspelled in EORA, restraurants
    '20_t_har': '2000_trade_hotels_and_restaurants',
    '20_c_har': '2000_CO2_hotels_and_restaurants',

    '20_t_xprt': '2000_trade_transport',
    '20_c_xprt': '2000_CO2_transport',

    '20_t_pat': '2000_trade_post_and_telecommunications',
    '20_c_pat': '2000_CO2_post_and_telecommunications',

    # Careful, misspelled in EORA, finacial
    '20_t_fiba': '2000_trade_financial_intermediation_and_business_activities',
    '20_c_fiba': '2000_CO2_financial_intermediation_and_business_activities',

    '20_t_pa': '2000_trade_public_administration',
    '20_c_pa': '2000_CO2_public_administration',

    '20_t_ehos': '2000_trade_education_health_and_other_services',
    '20_c_ehos': '2000_CO2_education_health_and_other_services',

    '20_t_ph': '2000_trade_private_households',
    '20_c_ph': '2000_CO2_private_households',

    '20_t_oth': '2000_trade_others',
    '20_c_oth': '2000_CO2_others',

    '20_t_reri': '2000_trade_re-export_and_re-import',
    '20_c_reri': '2000_CO2_re-export_and_re-import',
    
    
    
    'pc_t_ag': '1990-2000_percent_change_trade_agriculture',
    'pc_c_ag': '1990-2000_percent_change_CO2_agriculture',

    'pc_t_fsh': '1990-2000_percent_change_trade_fishing',
    'pc_c_fsh': '1990-2000_percent_change_CO2_fishing',

    'pc_t_maq': '1990-2000_percent_change_trade_mining_and_quarrying',
    'pc_c_maq': '1990-2000_percent_change_CO2_mining_and_quarrying',

    'pc_t_fab': '1990-2000_percent_change_trade_food_and_beverages',
    'pc_c_fab': '1990-2000_percent_change_CO2_food_and_beverages',

    'pc_t_tawa': '1990-2000_percent_change_trade_textiles_and_wearing_apparel',
    'pc_c_tawa': '1990-2000_percent_change_CO2_textiles_and_wearing_apparel',

    'pc_t_wap': '1990-2000_percent_change_trade_wood_and_paper',
    'pc_c_wap': '1990-2000_percent_change_CO2_wood_and_paper',

    'pc_t_pcnmp': '1990-2000_percent_change_trade_petroleum_chemical_and_non-metallic_mineral_products',
    'pc_c_pcnmp': '1990-2000_percent_change_CO2_petroleum_chemical_and_non-metallic_mineral_products',

    'pc_t_mp': '1990-2000_percent_change_trade_metal_products',
    'pc_c_mp': '1990-2000_percent_change_CO2_metal_products',

    'pc_t_eam': '1990-2000_percent_change_trade_electrical_and_machinery',
    'pc_c_eam': '1990-2000_percent_change_CO2_electrical_and_machinery',

    'pc_t_te': '1990-2000_percent_change_trade_transport_equipment',
    'pc_c_te': '1990-2000_percent_change_CO2_transport_equipment',

    'pc_t_om': '1990-2000_percent_change_trade_other_manufacturing',
    'pc_c_om': '1990-2000_percent_change_CO2_other_manufacturing',

    'pc_t_rcycl': '1990-2000_percent_change_trade_recycling',
    'pc_c_rcycl': '1990-2000_percent_change_CO2_recyling',

    'pc_t_egw': '1990-2000_percent_change_trade_electricity_gas_and_water',
    'pc_c_egw': '1990-2000_percent_change_CO2_electricity_gas_and_water',

    'pc_t_con': '1990-2000_percent_change_trade_construction',
    'pc_c_con': '1990-2000_percent_change_CO2_construction',

    'pc_t_mar': '1990-2000_percent_change_trade_maintenance_and_repair',
    'pc_c_mar': '1990-2000_percent_change_CO2_maintenance_and_repair',

    'pc_t_wt': '1990-2000_percent_change_trade_wholesale_trade',
    'pc_c_wt': '1990-2000_percent_change_CO2_wholesale_trade',

    'pc_t_rt': '1990-2000_percent_change_trade_retail_trade',
    'pc_c_rt': '1990-2000_percent_change_CO2_retail_trade',

    # Careful, misspelled in EORA, restraurants
    'pc_t_har': '1990-2000_percent_change_trade_hotels_and_restaurants',
    'pc_c_har': '1990-2000_percent_change_CO2_hotels_and_restaurants',

    'pc_t_xprt': '1990-2000_percent_change_trade_transport',
    'pc_c_xprt': '1990-2000_percent_change_CO2_transport',

    'pc_t_pat': '1990-2000_percent_change_trade_post_and_telecommunications',
    'pc_c_pat': '1990-2000_percent_change_CO2_post_and_telecommunications',

    # Careful, misspelled in EORA, finacial
    'pc_t_fiba': '1990-2000_percent_change_trade_financial_intermediation_and_business_activities',
    'pc_c_fiba': '1990-2000_percent_change_CO2_financial_intermediation_and_business_activities',

    'pc_t_pa': '1990-2000_percent_change_trade_public_administration',
    'pc_c_pa': '1990-2000_percent_change_CO2_public_administration',

    'pc_t_ehos': '1990-2000_percent_change_trade_education_health_and_other_services',
    'pc_c_ehos': '1990-2000_percent_change_CO2_education_health_and_other_services',

    'pc_t_ph': '1990-2000_percent_change_trade_private_households',
    'pc_c_ph': '1990-2000_percent_change_CO2_private_households',

    'pc_t_oth': '1990-2000_percent_change_trade_others',
    'pc_c_oth': '1990-2000_percent_change_CO2_others',

    'pc_t_reri': '1990-2000_percent_change_trade_re-export_and_re-import',
    'pc_c_reri': '1990-2000_percent_change_CO2_re-export_and_re-import'
    
    
}

In [None]:
china_province_data.sum()

In [None]:
# Maybe improve with the below link?:
# https://gis.stackexchange.com/questions/197945/geopandas-polygon-to-matplotlib-patches-polygon-conversion

start_time = datetime.datetime.now()
print("start mapping non-pc variables with geopandas:", start_time)

pp = PdfPages(file_destination+"/china_province_plots_1990_and_2000_vals.pdf")

for col in china_province_data:
    if not (col in ["GEOLEVEL2", "geometry"]) and col[0:2] != "pc":
        print(col)
        fig, ax = plt.subplots(1, figsize=(12,12))
        ax = china_province_data.plot(ax=ax, cmap="OrRd", scheme = "equal_interval", column=col, legend=True)
        fig.suptitle("Variable: " + shp_key_to_readable_titles_conversion[col])
        #plt.show()
        pp.savefig(fig)
        # Important, because otherwise all figures remain open
        # And can hog memory
        plt.close(fig)
        
        
pp.close()

end_time = datetime.datetime.now()
print("finished in: ", end_time - start_time)


start_time = datetime.datetime.now()
print("start mapping pc variables with geopandas:", start_time)

pp = PdfPages(file_destination+"/china_province_plots_per_change_1990_to_2000.pdf")

for col in china_province_data:
    if col[0:2] == "pc":
        print(col)
        fig, ax = plt.subplots(1, figsize=(12,12))
        ax = china_province_data.plot(ax=ax, cmap="bwr", scheme = "Quantiles", column=col, legend=True)
        fig.suptitle("Variable: " + shp_key_to_readable_titles_conversion[col])
        #plt.show()
        pp.savefig(fig)
        # Important, because otherwise all figures remain open
        # And can hog memory
        plt.close(fig)
        
pp.close()

end_time = datetime.datetime.now()
print("finished in: ", end_time - start_time)


In [None]:


start_time = datetime.datetime.now()
print("start mapping pc variables with geopandas:", start_time)

pp = PdfPages(file_destination+"/china_province_plots_per_change_1990_to_2000.pdf")

for col in china_province_data:
    if col[0:2] == "pc":
        print(col)
        fig, ax = plt.subplots(1, figsize=(12,12))
        ax = china_province_data.plot(ax=ax, cmap="bwr", scheme = "Quantiles", column=col, legend=True)
        fig.suptitle("Variable: " + shp_key_to_readable_titles_conversion[col])
        #plt.show()
        pp.savefig(fig)
        
pp.close()

end_time = datetime.datetime.now()
print("finished in: ", end_time - start_time)



In [None]:
china_province_data["19_c_eam"]

In [None]:
pp.close()