<font size=6> The total sovereign emissions calculated by adding up scope 1, 2 and 3 emissions  </font>

In [1]:
report_currency = 'USD'

In [2]:
from dotenv import dotenv_values, load_dotenv
import osc_ingest_trino as osc
import os
import pathlib
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [3]:
import trino
from sqlalchemy.engine import create_engine
from sqlalchemy import text

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
#connection = engine.connect()

#trino_bucket = osc.attach_s3_bucket("S3_DEV")

In [4]:
import boto3
import pandas as pd
import pint

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

In [5]:
import pandas as pd
from openscm_units import unit_registry
from pint import set_application_registry, Quantity
from pint_pandas import PintArray, PintType
# First we create the registry.
ureg = unit_registry
Q_ = ureg.Quantity
ureg.default_format = '~'
ureg.define("CO2e = CO2 = CO2eq")
ureg.define("USD = [currency] ")
#ureg.define("EUR = [currency_EUR] ")
ureg.define('Millions=1000000')
set_application_registry(ureg)

In [6]:
def requantify_df(df):
    units_col = None
    columns_reversed = reversed(df.columns)
    for col in columns_reversed:
        if col.endswith("_units"):
            if units_col:
                # We expect _units column to follow a non-units column
                raise ValueError
            units_col = col
            continue
        if units_col:
            if col + '_units' != units_col:
                raise ValueError
            if (df[units_col]==df[units_col][0]).all():
                # Make a PintArray
                new_col = PintArray(df[col], dtype=f"pint[{ureg(df[units_col][0]).u}]")
            else:
                # Make a pd.Series of Quantity in a way that does not throw UnitStrippedWarning
                new_col = pd.Series(data=df[col], name=col) * pd.Series(data=df[units_col].map(lambda x: ureg(x).u), name=col)
            df = df.drop(columns=units_col)
            df[col] = new_col
            units_col = None
    return df


In [7]:
# If DF_COL contains Pint quantities (because it is a PintArray or an array of Pint Quantities),
# return a two-column dataframe of magnitudes and units.
# If DF_COL contains no Pint quanities, return it unchanged.

def dequantify_column(df_col: pd.Series):
    if type(df_col.values)==PintArray:
        return pd.DataFrame({df_col.name: df_col.values.quantity.m,
                             df_col.name + "_units": str(df_col.values.dtype.units)},
                            index=df_col.index)
    elif df_col.size==0:
        return df_col
    elif isinstance(df_col.iloc[0], Quantity):
        values = df_col.map(lambda x: (x.m, x.u))
        return pd.DataFrame({df_col.name: df_col.map(lambda x: x.m),
                             df_col.name + "_units": df_col.map(lambda x: str(x.u))},
                            index=df_col.index)
    else:
        return df_col

# Rewrite dataframe DF so that columns containing Pint quantities are represented by a column for the Magnitude and column for the Units.
# The magnitude column retains the original column name and the units column is renamed with a _units suffix.
def dequantify_df(df):
    return pd.concat([dequantify_column(df[col]) for col in df.columns], axis=1)

In [8]:
# Get the values saved in Trino related with Foreign CO2 emissions embodied in gross imports

import pandas as pd
import pandas as pd
from openscm_units import unit_registry
from pint import set_application_registry, Quantity
from pint_pandas import PintArray, PintType

ureg = unit_registry
Q_ = ureg.Quantity
ureg.default_format = '~'
ureg.define("CO2e = CO2 = CO2eq")
ureg.define("USD = [currency] ")
ureg.define("EUR = [currency_EUR] ")
ureg.define('Millions=1000000')
set_application_registry(ureg)


ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'pcaf_sovereign_footprint'
ingest_table = 'sf_total_sovereign_emissions'


src_table_1 = 'sf_oecd_imgr_fco2'
src_table_2 = 'sf_unfccc_results'
src_table_3 = 'sf_oecd_exgr_dco2'
src_table_4 = 'sf_oecd_exch_rates'
src_table_5 = 'sf_wdi_population'

sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_1} """ 
df1 = pd.read_sql(sql, engine)
df1 = requantify_df(df1)
df1
# convert to CO2 units to t
df1['value'] = df1['value'].pint.to("t CO2e")
df1 = dequantify_df(df1)
df1

Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,FIN,NLD,DTOTAL,1995,Foreign CO2 emissions embodied in gross imports,763000.0,CO2eq * t
1,FIN,NLD,DTOTAL,1996,Foreign CO2 emissions embodied in gross imports,770000.0,CO2eq * t
2,FIN,NLD,DTOTAL,1997,Foreign CO2 emissions embodied in gross imports,763000.0,CO2eq * t
3,FIN,NLD,DTOTAL,1998,Foreign CO2 emissions embodied in gross imports,744000.0,CO2eq * t
4,FIN,NLD,DTOTAL,1999,Foreign CO2 emissions embodied in gross imports,723000.0,CO2eq * t
...,...,...,...,...,...,...,...
334651,MAR,IND,D35,2014,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t
334652,MAR,IND,D35,2015,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t
334653,MAR,IND,D35,2016,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t
334654,MAR,IND,D35,2017,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t


In [9]:
df1.to_csv("oecd_imgr_fco2_results.csv",index=False)

In [10]:
sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_3} where industry_code='DTOTAL'""" 
df_exgr = pd.read_sql(sql, engine)
df_exgr = requantify_df(df_exgr)
# convert to CO2 units to t
df_exgr['value'] = df_exgr['value'].pint.to("t CO2e")
df_exgr = dequantify_df(df_exgr)
df_exgr

Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,FIN,NLD,DTOTAL,1995,Domestic CO2 emissions embodied in gross exports,932000.0,CO2eq * t
1,FIN,NLD,DTOTAL,1996,Domestic CO2 emissions embodied in gross exports,915000.0,CO2eq * t
2,FIN,NLD,DTOTAL,1997,Domestic CO2 emissions embodied in gross exports,948000.0,CO2eq * t
3,FIN,NLD,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,924000.0,CO2eq * t
4,FIN,NLD,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,820000.0,CO2eq * t
...,...,...,...,...,...,...,...
167323,ARG,MMR,DTOTAL,2014,Domestic CO2 emissions embodied in gross exports,2000.0,CO2eq * t
167324,ARG,MMR,DTOTAL,2015,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t
167325,ARG,MMR,DTOTAL,2016,Domestic CO2 emissions embodied in gross exports,5000.0,CO2eq * t
167326,ARG,MMR,DTOTAL,2017,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t


In [11]:
###########################

sql=f"""
select validity_date,value from {ingest_catalog}.{ingest_schema}.{src_table_4} where country_iso_code='DEU'""" 
df_exch_eur = pd.read_sql(sql, engine)
df_exch_eur

Unnamed: 0,validity_date,value
0,1950,2.144861
1,1951,2.144861
2,1952,2.144861
3,1953,2.147426
4,1954,2.147426
...,...,...
67,2017,0.885206
68,2018,0.846773
69,2019,0.893276
70,2020,0.875506


In [12]:
import pandas as pd 
sql=f"""
select country_iso_code,validity_date,value from {ingest_catalog}.{ingest_schema}.{src_table_5} """ 
df_population = pd.read_sql(sql, engine)
df_population["value"] = pd.to_numeric(df_population["value"], errors='coerce')
df_population["validity_date"] = pd.to_numeric(df_population["validity_date"], errors='coerce')
df_population=df_population.convert_dtypes()
df_population
#df_population.info(verbose=True)


Unnamed: 0,country_iso_code,validity_date,value
0,AFG,2017,36296111
1,ALB,2017,2873457
2,DZA,2017,41389174
3,ASM,2017,55617
4,AND,2017,76997
...,...,...,...
3187,SSF,2021,1165563987
3188,SSA,2021,1165464785
3189,TSS,2021,1165563987
3190,UMC,2021,2501427939


<font size=3> Remove invalid country codes </font>

In [13]:
import pycountry
import pandas as pd
df_country = pd.DataFrame([country.__dict__['_fields'] for country in pycountry.countries])
df_country = df_country.rename(columns={'alpha_3':'country_iso_code'})
df_country=df_country[['country_iso_code']]
# add Rest of World to the country dataframe
dict_row = {'country_iso_code':'ROW'}
df_country = pd.concat([df_country,pd.DataFrame([dict_row])],ignore_index=True)
df_country.info(verbose=True)
df1=df1.convert_dtypes()
#df1.info(verbose=True)
df1 = pd.merge(df1,df_country,on=['country_iso_code'])  
df1=df1.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df1

df_exgr = pd.merge(df_exgr,df_country,on=['country_iso_code'])  
df_exgr=df_exgr.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df_exgr






<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 1 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   country_iso_code  250 non-null    object
dtypes: object(1)
memory usage: 2.1+ KB


Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,FIN,NLD,DTOTAL,1995,Domestic CO2 emissions embodied in gross exports,932000.0,CO2eq * t
1,FIN,NLD,DTOTAL,1996,Domestic CO2 emissions embodied in gross exports,915000.0,CO2eq * t
2,FIN,NLD,DTOTAL,1997,Domestic CO2 emissions embodied in gross exports,948000.0,CO2eq * t
3,FIN,NLD,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,924000.0,CO2eq * t
4,FIN,NLD,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,820000.0,CO2eq * t
...,...,...,...,...,...,...,...
135067,ARG,MMR,DTOTAL,2014,Domestic CO2 emissions embodied in gross exports,2000.0,CO2eq * t
135068,ARG,MMR,DTOTAL,2015,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t
135069,ARG,MMR,DTOTAL,2016,Domestic CO2 emissions embodied in gross exports,5000.0,CO2eq * t
135070,ARG,MMR,DTOTAL,2017,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t


In [14]:
df_country

Unnamed: 0,country_iso_code
0,ABW
1,AFG
2,AGO
3,AIA
4,ALA
...,...
245,YEM
246,ZAF
247,ZMB
248,ZWE


Calcuate Scope 2:
GHG emissions occurring as a consequence of the domestic use of grid-supplied electricity, heat,
steam and/or cooling which is imported from another territory


In [15]:
rename_columns = {'value':'total','partner_iso_code':'country_iso_code','value_units' :'total_units'}
agg_columns = { 'value' : 'sum'}
columns_order = ['attribute','industry_code','country_iso_code','validity_date','total','total_units']
df1 = df1.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df1.convert_dtypes()

df1.info(verbose=True)
df1

######################

df_exgr = df_exgr.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df_exgr.convert_dtypes()

df_exgr.info(verbose=True)
df_exgr



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4032 entries, 0 to 4031
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         4032 non-null   string 
 1   industry_code     4032 non-null   string 
 2   country_iso_code  4032 non-null   string 
 3   validity_date     4032 non-null   Int64  
 4   total             4032 non-null   Float64
 5   total_units       4032 non-null   string 
dtypes: Float64(1), Int64(1), string(4)
memory usage: 197.0 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         2016 non-null   string 
 1   industry_code     2016 non-null   string 
 2   country_iso_code  2016 non-null   string 
 3   validity_date     2016 non-null   Int64  
 4   total             2016 non-null   Float64
 5   total_units    

Unnamed: 0,attribute,industry_code,country_iso_code,validity_date,total,total_units
0,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1995,2008843000.0,CO2eq * t
1,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1996,2055450000.0,CO2eq * t
2,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1997,2151870000.0,CO2eq * t
3,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1998,2177243000.0,CO2eq * t
4,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1999,2300588000.0,CO2eq * t
...,...,...,...,...,...,...
2011,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2014,266081000.0,CO2eq * t
2012,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2015,231751000.0,CO2eq * t
2013,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2016,211686000.0,CO2eq * t
2014,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2017,226732000.0,CO2eq * t


In [15]:
#df1.to_csv("oecd_imgr_fco2_results2.csv",index=False)

In [16]:
# divide the dataframe into two dataframes 
# to calculate Scope2 and Scope3 Values
df_D35 = df1[df1['industry_code']=='D35']
df_D35 = df_D35[['country_iso_code','validity_date','total','total_units']]
df_DTOTAL = df1[df1['industry_code']=='DTOTAL']
df_DTOTAL = df_DTOTAL[['country_iso_code','validity_date','total','total_units']]
df_result = pd.merge(df_D35,df_DTOTAL,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()

#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total_x':'scope2_value','total_y':'scope3_value','total_units_x':'scope2_value_units','total_units_y':'scope3_value_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result['scope3_value'] = df_result['scope3_value'] - df_result['scope2_value'] 
df_result[df_result['country_iso_code']=='CAN']

df_result = pd.merge(df_result,df_exgr,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()
#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total':'exported_emissions','total_units':'exported_emissions_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result.drop(columns=['attribute','industry_code'],inplace=True)
df_result.info(verbose=True)



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   country_iso_code          2016 non-null   string 
 1   validity_date             2016 non-null   Int64  
 2   scope2_value              2016 non-null   Float64
 3   scope2_value_units        2016 non-null   string 
 4   scope3_value              2016 non-null   Float64
 5   scope3_value_units        2016 non-null   string 
 6   exported_emissions        2016 non-null   Float64
 7   exported_emissions_units  2016 non-null   string 
dtypes: Float64(3), Int64(1), string(4)
memory usage: 134.0 KB


In [17]:
# Get the values saved in Trino containing  GHG emissions provided by UNFCCC & GDP PPP values from Worldbank

import pandas as pd


sql=f"""
select country_iso_code,validity_date,ghg_total_without_lulucf,ghg_total_without_lulucf_units,scope1_excl_source,ghg_total_with_lulucf,ghg_total_with_lulucf_units
,scope1_incl_source,gdp,gdp_units,gdp_ppp,gdp_ppp_units
from {ingest_catalog}.{ingest_schema}.{src_table_2} """ 
df_unfccc = pd.read_sql(sql, engine)

if (report_currency == 'EUR'):
    df_unfccc = pd.merge(df_unfccc,df_exch_eur,on=['validity_date'],how='inner')   
    df_unfccc['gdp'] = df_unfccc['gdp'] * df_unfccc['value']
    df_unfccc['gdp_ppp'] = df_unfccc['gdp_ppp'] * df_unfccc['value']
    df_unfccc['gdp_units'] = 'EUR'
    df_unfccc['gdp_ppp_units'] = 'EUR'
    df_unfccc.drop(columns=['value'],inplace=True)
    


df_unfccc[df_unfccc['country_iso_code']=='ARG']
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)
#df1["units"] = "kt"
#df1 = requantify_df(df1).convert_dtypes()
#df1.info(verbose=True)
#df1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15805 entries, 0 to 15804
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                15805 non-null  string 
 1   validity_date                   15805 non-null  Int64  
 2   ghg_total_without_lulucf        12159 non-null  Float64
 3   ghg_total_without_lulucf_units  15805 non-null  string 
 4   scope1_excl_source              12159 non-null  string 
 5   ghg_total_with_lulucf           12160 non-null  Float64
 6   ghg_total_with_lulucf_units     15805 non-null  string 
 7   scope1_incl_source              12160 non-null  string 
 8   gdp                             13365 non-null  Float64
 9   gdp_units                       15805 non-null  string 
 10  gdp_ppp                         7728 non-null   Float64
 11  gdp_ppp_units                   15805 non-null  string 
dtypes: Float64(4), Int64(1), string(

In [18]:
import numpy as np
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)

df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))

df_unfccc = requantify_df(df_unfccc)
df_unfccc.info(verbose=True)
# convert to CO2 units to Mt
df_unfccc['ghg_total_without_lulucf'] = df_unfccc['ghg_total_without_lulucf'].pint.to("t CO2e")
df_unfccc['ghg_total_with_lulucf'] = df_unfccc['ghg_total_with_lulucf'].pint.to("t CO2e")
####

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf==0, np.nan, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf==0, np.nan, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp==0, np.nan, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp==0, np.nan, df_unfccc.gdp_ppp))

df_unfccc.info(verbose=True)


df_unfccc[df_unfccc['country_iso_code']=='ARG']



#df_unfccc['gdp'] = df_unfccc['gdp'].pint.to("Millions USD")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15805 entries, 0 to 15804
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                15805 non-null  string 
 1   validity_date                   15805 non-null  Int64  
 2   ghg_total_without_lulucf        12159 non-null  Float64
 3   ghg_total_without_lulucf_units  15805 non-null  string 
 4   scope1_excl_source              12159 non-null  string 
 5   ghg_total_with_lulucf           12160 non-null  Float64
 6   ghg_total_with_lulucf_units     15805 non-null  string 
 7   scope1_incl_source              12160 non-null  string 
 8   gdp                             13365 non-null  Float64
 9   gdp_units                       15805 non-null  string 
 10  gdp_ppp                         7728 non-null   Float64
 11  gdp_ppp_units                   15805 non-null  string 
dtypes: Float64(4), Int64(1), string(



Unnamed: 0,country_iso_code,validity_date,ghg_total_without_lulucf,scope1_excl_source,ghg_total_with_lulucf,scope1_incl_source,gdp,gdp_ppp
419,ARG,1991,244000000.0,PRIMAP,310509690.0,PRIMAP,189719984268.485,264521270370.093
420,ARG,1992,249000000.0,PRIMAP,317901690.0,PRIMAP,228778994288.214,292023773373.283
421,ARG,1993,254000000.0,PRIMAP,315497360.0,PRIMAP,236741715015.015,323480141867.088
422,ARG,1995,258000000.0,PRIMAP,326356350.0,PRIMAP,258031750000.0,346844681084.361
423,ARG,1996,271000000.0,PRIMAP,358400100.0,PRIMAP,272149750000.0,372715304230.914
...,...,...,...,...,...,...,...,...
13940,ARG,1980,237000000.0,PRIMAP,295763245.0,PRIMAP,76961923741.0701,0.0
13941,ARG,1981,228000000.0,PRIMAP,287037286.0,PRIMAP,78676842366.5529,0.0
13942,ARG,1982,227000000.0,PRIMAP,313544111.99999994,PRIMAP,84307486836.724,0.0
13943,ARG,1983,230000000.0,PRIMAP,323833105.0,PRIMAP,103979106777.911,0.0


In [19]:
#df_unfccc = dequantify_df(df_unfccc)
#df_unfccc
#df_unfccc.info(verbose=True)
#df_unfccc

df_result= requantify_df(df_result)
#df_result= dequantify_df(df_result)
#df_result




<font size=3><b>Merge UNFCCC , Worldbank and OECD data</b> </font>

In [20]:
#df_result.info(verbose=True)
#df_result= requantify_df(df_result)
#df_result.info(verbose=True)

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))




df_result = pd.merge(df_result,df_unfccc,on=['country_iso_code','validity_date'],how='outer')  


rename_columns = {'ghg_total_without_lulucf':'scope1_excl_lulucf',
                  'ghg_total_with_lulucf':'scope1_incl_lulucf'}
#columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_incl_lulucf','scope1_incl_lulucf_units','scope2_value','scope2_value_units','scope3_value','scope3_value_units','exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units']
df_result = df_result.rename(columns=rename_columns)
df_result.info(verbose=True)

# enrich with population data

df_result = pd.merge(df_result,df_population,on=['country_iso_code','validity_date'],how='outer')  


rename_columns = {'value':'population'}
#columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_incl_lulucf','scope1_incl_lulucf_units','scope2_value','scope2_value_units','scope3_value','scope3_value_units','exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units']
df_result = df_result.rename(columns=rename_columns)
df_result.info(verbose=True)
df_result["population"] = df_result["population"].fillna(0)

df_result = dequantify_df(df_result)

#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope1_incl_lulucf'] = df_result['scope1_incl_lulucf'].fillna(0).astype(float)
#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope2_value'] = df_result['scope2_value'].fillna(0).astype(float)
#df_result['scope3_value'] = df_result['scope3_value'].fillna(0).astype(float)


df_result['sum_yn'] = 1 * ((df_result['scope1_excl_lulucf'] > 0) & (df_result['scope2_value'] > 0) &  (df_result['scope3_value'] > 0) )
df_result['calc_attr_factor_yn'] = 1 * (df_result['gdp_ppp'] > 0)
df_result['population_available'] = 1 * (df_result['population'] > 0)


df_result['consumption_emissions_excl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])
df_result['consumption_emissions_incl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])

df_result.info(verbose=True)

df_result['consumption_emissions_excl_lulucf_per_capita']  = df_result['population_available'] * df_result['consumption_emissions_excl_lulucf'] / df_result['population']
df_result['consumption_emissions_incl_lulucf_per_capita']  = df_result['population_available'] * df_result['consumption_emissions_incl_lulucf'] / df_result['population']


###############
df_result['attribution_factor_scope1_excl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_excl_lulucf']) / df_result['gdp_ppp']
df_result['attribution_factor_scope1_incl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_incl_lulucf']) / df_result['gdp_ppp']

df_result['attribution_factor_excl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
df_result['attribution_factor_incl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
##########
df_result['attribution_factor_excl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])
df_result['attribution_factor_incl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])



#df_result['ghg_total_without_lulucf']=df_result['ghg_total_without_lulucf'].replace(0, np.nan)
#df_result['ghg_total_with_lulucf']=df_result['ghg_total_with_lulucf'].replace(0, np.nan)
#df_result['gdp']=df_result['gdp'].replace(0, np.nan)
#df_result['gdp_ppp']=df_result['gdp_ppp'].replace(0, np.nan)



#df_result

columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_excl_source','scope1_incl_lulucf','scope1_incl_lulucf_units',
                 'scope1_incl_source','scope2_value','scope2_value_units','scope3_value','scope3_value_units',
                 'exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'population','consumption_emissions_excl_lulucf_per_capita','consumption_emissions_incl_lulucf_per_capita',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
#'consumption_emissions_excl_lulucf_units','consumption_emissions_incl_lulucf_units','consumption_emissions_excl_lulucf_per_capita_units','consumption_emissions_incl_lulucf_per_capita_units',
#'attribution_factor_excl_lulucf_units','attribution_factor_excl_lulucf_gdp_units','attribution_factor_incl_lulucf_units','attribution_factor_incl_lulucf_units',
#'attribution_factor_scope1_excl_lulucf_units','attribution_factor_scope1_incl_lulucf_units',

df_result = df_result.reindex(columns=columns_order)
df_result.info(verbose=True)
df_result= dequantify_df(df_result)
df_result.info(verbose=True)
# remove invalid country_iso_codes (regions, ...)

df_result = pd.merge(df_result,df_country,on=['country_iso_code'])  

cols= ['scope1_excl_lulucf','scope1_incl_lulucf','scope2_value','scope3_value',
                 'exported_emissions','gdp_ppp','gdp',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'population','consumption_emissions_excl_lulucf_per_capita','consumption_emissions_incl_lulucf_per_capita',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
df_result[cols] = df_result[cols].replace({0:np.nan})

df_result.info(verbose=True)

df_result


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16213 entries, 0 to 16212
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   country_iso_code    16213 non-null  string         
 1   validity_date       16213 non-null  Int64          
 2   scope2_value        2016 non-null   pint[CO2eq * t]
 3   scope3_value        2016 non-null   pint[CO2eq * t]
 4   exported_emissions  2016 non-null   pint[CO2eq * t]
 5   scope1_excl_lulucf  15805 non-null  pint[CO2eq * t]
 6   scope1_excl_source  12159 non-null  string         
 7   scope1_incl_lulucf  15805 non-null  pint[CO2eq * t]
 8   scope1_incl_source  12160 non-null  string         
 9   gdp                 15805 non-null  pint[USD]      
 10  gdp_ppp             15805 non-null  pint[USD]      
dtypes: Int64(1), pint[CO2eq * t](5), pint[USD](2), string(3)
memory usage: 1.4 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16290 entries, 0 

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope1_incl_source,scope2_value,scope2_value_units,...,consumption_emissions_incl_lulucf,population,consumption_emissions_excl_lulucf_per_capita,consumption_emissions_incl_lulucf_per_capita,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_incl_lulucf,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_gdp,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_gdp
0,ARG,1995,258000000.0,CO2eq * t,PRIMAP,326356350.0,CO2eq * t,PRIMAP,21000.0,CO2eq * t,...,316048350.0,,,,0.000744,0.000941,0.000771,0.001036,0.000968,0.001301
1,ARG,1996,271000000.0,CO2eq * t,PRIMAP,358400100.0,CO2eq * t,PRIMAP,28000.0,CO2eq * t,...,347408100.0,,,,0.000727,0.000962,0.000757,0.001037,0.000992,0.001358
2,ARG,1997,272000000.0,CO2eq * t,PRIMAP,354602760.0,CO2eq * t,PRIMAP,26000.0,CO2eq * t,...,342713760.0,,,,0.000664,0.000865,0.000700,0.000980,0.000902,0.001262
3,ARG,1998,278000000.0,CO2eq * t,PRIMAP,379164780.0,CO2eq * t,PRIMAP,43000.0,CO2eq * t,...,363660780.0,,,,0.000646,0.000881,0.000677,0.000975,0.000912,0.001313
4,ARG,1999,283000000.0,CO2eq * t,PRIMAP,369599380.0,CO2eq * t,PRIMAP,61000.0,CO2eq * t,...,355975380.0,,,,0.000671,0.000876,0.000700,0.001042,0.000906,0.001347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13034,GIB,2018,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33715,,,,,,,,
13035,GIB,2012,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33653,,,,,,,,
13036,GIB,2014,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33726,,,,,,,,
13037,GIB,2013,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33694,,,,,,,,


In [21]:
#df_result['scope1_value_units'] = 'Mt CO2e'
#df_result['scope1_value'] = df_result['scope1_value'].round(decimals = 3)
#pd.options.display.float_format = '{:.3f}'.format
#df_result= df_result.convert_dtypes()
#df_result.info(verbose=True)
df_result[df_result['country_iso_code']=='CAN']
#df_result

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope1_incl_source,scope2_value,scope2_value_units,...,consumption_emissions_incl_lulucf,population,consumption_emissions_excl_lulucf_per_capita,consumption_emissions_incl_lulucf_per_capita,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_incl_lulucf,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_gdp,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_gdp
441,CAN,1995,651000000.0,CO2eq * t,PRIMAP,607713232.0,CO2eq * t,PRIMAP,5129000.0,CO2eq * t,...,655305232.0,,,,0.000946,0.000883,0.001214,0.001378,0.001151,0.001307
442,CAN,1996,673000000.0,CO2eq * t,PRIMAP,624227205.0,CO2eq * t,PRIMAP,5269000.0,CO2eq * t,...,677950205.0,,,,0.000945,0.000877,0.001217,0.001374,0.001148,0.001297
443,CAN,1997,689000000.0,CO2eq * t,PRIMAP,636612930.0,CO2eq * t,PRIMAP,6362000.0,CO2eq * t,...,685212930.0,,,,0.000912,0.000843,0.001189,0.001372,0.001120,0.001292
444,CAN,1998,696000000.0,CO2eq * t,PRIMAP,637685168.0,CO2eq * t,PRIMAP,7993000.0,CO2eq * t,...,693163168.0,,,,0.000877,0.000803,0.001157,0.001448,0.001083,0.001356
445,CAN,1999,709000000.0,CO2eq * t,PRIMAP,658474168.0,CO2eq * t,PRIMAP,9100000.0,CO2eq * t,...,718937168.0,,,,0.000838,0.000778,0.001112,0.001387,0.001052,0.001313
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499,CAN,1975,506000000.0,CO2eq * t,PRIMAP,617382960.0,CO2eq * t,PRIMAP,,CO2eq * t,...,,,,,,,,,,
500,CAN,1976,506000000.0,CO2eq * t,PRIMAP,617739360.0,CO2eq * t,PRIMAP,,CO2eq * t,...,,,,,,,,,,
501,CAN,1977,517000000.0,CO2eq * t,PRIMAP,629125100.0,CO2eq * t,PRIMAP,,CO2eq * t,...,,,,,,,,,,
502,CAN,1978,526000000.0,CO2eq * t,PRIMAP,638532830.0,CO2eq * t,PRIMAP,,CO2eq * t,...,,,,,,,,,,


Calculate Attribution factor = (Scope1+Scope2+Scope3)/GDP


In [22]:


#df_result= requantify_df(df_result)
format_mapper =  {'gdp_ppp': '{0:,.0f}',
           'gdp': '{0:,.0f}',       
           'attribution_factor_excl_lulucf': '{0:.8f}',
           'attribution_factor_incl_lulucf': '{0:.8f}',
           'attribution_factor_excl_lulucf_gdp': '{0:.8f}',
           'attribution_factor_incl_lulucf_gdp': '{0:.8f}',       
           'scope1_excl_lulucf': '{0:,.0f}',       
           'scope1_incl_lulucf': '{0:,.0f}',              
           'scope2_value': '{0:,.0f}',              
           'scope3_value': '{0:,.0f}'
                 }

df_result[df_result['country_iso_code']=='CAN'].style.format(format_mapper)


ImportError: Pandas requires version '3.0.0' or newer of 'jinja2' (version '2.11.3' currently installed).

<font size=3>Save the results in Trino</font>

In [23]:
# Add the sources used for the calculations in the result table
#df_result['scope1_excl_source'] = "UNFCCC -> Time_Series_GHG_total_without_LULUCF_in_kt_CO2_equivalent.xlsx"
#df_result['scope1_incl_source'] = "UNFCCC -> Time_Series_GHG_total_with_LULUCF_in_kt_CO2_equivalent.xlsx"
df_result['scope2_source'] = "OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE"
df_result['scope3_source'] = "OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value "          
df_result['gdp_ppp_source'] = "Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv"          


In [24]:
import osc_ingest_trino as osc
#df_result['validity_date']=pd.to_datetime(df_result['validity_date'], unit='D')
df_result=df_result.convert_dtypes()
df_result.info(verbose=True)

df_result['year'] = df_result['validity_date']
#df_result['validity_date'] = pd.to_datetime(df_result.validity_date, format='%Y')
df_result = df_result.convert_dtypes()
#df_result.validity_date = df_result.validity_date.astype('datetime64[ns]')



columnschema = osc.create_table_schema_pairs(df_result,typemap={'datetime64[ns]':'timestamp(6)'}) 
sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
print(sql)
with engine.connect() as conn:
    qres = conn.execute(text(sql))
    #print(qres.fetchall())





<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13039 entries, 0 to 13038
Data columns (total 32 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   country_iso_code                              13039 non-null  string 
 1   validity_date                                 13039 non-null  Int64  
 2   scope1_excl_lulucf                            12159 non-null  Float64
 3   scope1_excl_lulucf_units                      13039 non-null  string 
 4   scope1_excl_source                            12159 non-null  string 
 5   scope1_incl_lulucf                            12159 non-null  Float64
 6   scope1_incl_lulucf_units                      13039 non-null  string 
 7   scope1_incl_source                            12160 non-null  string 
 8   scope2_value                                  1419 non-null   Float64
 9   scope2_value_units                            13039 non-null 

In [25]:
df_result.info("verbose=True")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13039 entries, 0 to 13038
Data columns (total 33 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   country_iso_code                              13039 non-null  string 
 1   validity_date                                 13039 non-null  Int64  
 2   scope1_excl_lulucf                            12159 non-null  Float64
 3   scope1_excl_lulucf_units                      13039 non-null  string 
 4   scope1_excl_source                            12159 non-null  string 
 5   scope1_incl_lulucf                            12159 non-null  Float64
 6   scope1_incl_lulucf_units                      13039 non-null  string 
 7   scope1_incl_source                            12160 non-null  string 
 8   scope2_value                                  1419 non-null   Float64
 9   scope2_value_units                            13039 non-null 

In [26]:
df_result = df_result[df_result['year']>=1990]
df_result.sort_values(by=['country_iso_code','validity_date'], inplace=True)
df_result.to_excel("pcaf_results.xlsx",index=False)
df_result.to_csv("pcaf_results.csv",index=False)


In [27]:
df_result.to_sql(ingest_table,
           con=engine,
           schema=ingest_schema,
           if_exists='append',
           index=False,
           method=osc.TrinoBatchInsert(batch_size = 50 , verbose = True))


constructed fully qualified table name as: "pcaf_sovereign_footprint.sf_total_sovereign_emissions"
inserting 50 records
  ('ABW', 1990, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 1363755808.78006, 'USD', 764804469.273743, 'USD', NULL, NULL, 62152, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE', 'OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value ', 'Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv', 1990)
  ('ABW', 1991, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 1522141298.82589, 'USD', 872067039.106145, 'USD', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE', 'OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value ', 'Worldbank -> AP

In [28]:
sql=f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table} where country_iso_code = 'BRA' and year > 2017 """
pd.read_sql(sql, engine)

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope1_incl_source,scope2_value,scope2_value_units,...,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_incl_lulucf,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_gdp,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_gdp,scope2_source,scope3_source,gdp_ppp_source,year
0,BRA,2018,1090000000.0,CO2eq * t,PRIMAP,1479250000.0,CO2eq * t,PRIMAP,11000.0,CO2eq * t,...,0.000346,0.00047,0.000383,0.000628,0.000506,0.000831,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2018
1,BRA,2019,1100000000.0,CO2eq * t,PRIMAP,1499492000.0,CO2eq * t,PRIMAP,,CO2eq * t,...,0.000339,0.000463,,,,,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2019
2,BRA,2020,1080000000.0,CO2eq * t,PRIMAP,1439934000.0,CO2eq * t,PRIMAP,,CO2eq * t,...,0.00034,0.000453,,,,,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2020
3,BRA,2021,1130000000.0,CO2eq * t,PRIMAP,1488956000.0,CO2eq * t,PRIMAP,,CO2eq * t,...,0.000324,0.000427,,,,,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2021
4,BRA,2022,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,,,,,,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2022
