<font size=6> The total sovereign emissions calculated by adding up scope 1, 2 and 3 emissions  </font>

In [1]:
report_currency = 'USD'

In [2]:
from dotenv import dotenv_values, load_dotenv
import osc_ingest_trino as osc
import os
import pathlib
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [3]:
import trino
from sqlalchemy.engine import create_engine

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

#trino_bucket = osc.attach_s3_bucket("S3_DEV")

In [4]:
import boto3
import pandas as pd

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

In [5]:
# define source and destination tables
# LULUCF (Land Use, Land-Use Change and Forestry)
ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'pcaf_sovereign_footprint'
ingest_table = 'sf_total_sovereign_emissions'
src_table_1 = 'sf_oecd_imgr_fco2'
src_table_2 = 'sf_unfccc_results'
src_table_3 = 'sf_oecd_exgr_dco2'
src_table_4 = 'sf_oecd_exch_rates'




In [6]:
import pandas as pd
from openscm_units import unit_registry
from pint import set_application_registry, Quantity
from pint_pandas import PintArray, PintType
# First we create the registry.
ureg = unit_registry
Q_ = ureg.Quantity
ureg.default_format = '~'
ureg.define("CO2e = CO2 = CO2eq")
ureg.define("USD = [currency] ")
ureg.define("EUR = [currency_EUR] ")
ureg.define('Millions=1000000')
set_application_registry(ureg)


In [7]:
def requantify_df(df):
    units_col = None
    columns_reversed = reversed(df.columns)
    for col in columns_reversed:
        if col.endswith("_units"):
            if units_col:
                # We expect _units column to follow a non-units column
                raise ValueError
            units_col = col
            continue
        if units_col:
            if col + '_units' != units_col:
                raise ValueError
            if (df[units_col]==df[units_col][0]).all():
                # Make a PintArray
                new_col = PintArray(df[col], dtype=f"pint[{ureg(df[units_col][0]).u}]")
            else:
                # Make a pd.Series of Quantity in a way that does not throw UnitStrippedWarning
                new_col = pd.Series(data=df[col], name=col) * pd.Series(data=df[units_col].map(lambda x: ureg(x).u), name=col)
            df = df.drop(columns=units_col)
            df[col] = new_col
            units_col = None
    return df


In [8]:
# If DF_COL contains Pint quantities (because it is a PintArray or an array of Pint Quantities),
# return a two-column dataframe of magnitudes and units.
# If DF_COL contains no Pint quanities, return it unchanged.

def dequantify_column(df_col: pd.Series):
    if type(df_col.values)==PintArray:
        return pd.DataFrame({df_col.name: df_col.values.quantity.m,
                             df_col.name + "_units": str(df_col.values.dtype.units)},
                            index=df_col.index)
    elif df_col.size==0:
        return df_col
    elif isinstance(df_col.iloc[0], Quantity):
        values = df_col.map(lambda x: (x.m, x.u))
        return pd.DataFrame({df_col.name: df_col.map(lambda x: x.m),
                             df_col.name + "_units": df_col.map(lambda x: str(x.u))},
                            index=df_col.index)
    else:
        return df_col

# Rewrite dataframe DF so that columns containing Pint quantities are represented by a column for the Magnitude and column for the Units.
# The magnitude column retains the original column name and the units column is renamed with a _units suffix.
def dequantify_df(df):
    return pd.concat([dequantify_column(df[col]) for col in df.columns], axis=1)

In [9]:
# Get the values saved in Trino related with Foreign CO2 emissions embodied in gross imports

import pandas as pd

sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_1} """ 
df1 = pd.read_sql(sql, engine)
df1 = requantify_df(df1)
df1
# convert to CO2 units to t
df1['value'] = df1['value'].pint.to("t CO2e")
df1 = dequantify_df(df1)


sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_3} where industry_code='DTOTAL'""" 
df_exgr = pd.read_sql(sql, engine)
df_exgr = requantify_df(df_exgr)
# convert to CO2 units to t
df_exgr['value'] = df_exgr['value'].pint.to("t CO2e")
df_exgr = dequantify_df(df_exgr)

###########################

sql=f"""
select validity_date,value from {ingest_catalog}.{ingest_schema}.{src_table_4} where country_iso_code='DEU'""" 
df_exch_eur = pd.read_sql(sql, engine)
df_exch_eur



Unnamed: 0,validity_date,value
0,1950,2.144861
1,1951,2.144861
2,1952,2.144861
3,1953,2.147426
4,1954,2.147426
...,...,...
67,2017,0.885206
68,2018,0.846773
69,2019,0.893276
70,2020,0.875506


<font size=3> Remove invalid country codes </font>

In [10]:
import pycountry
import pandas as pd
df_country = pd.DataFrame([country.__dict__['_fields'] for country in pycountry.countries])
df_country = df_country.rename(columns={'alpha_3':'country_iso_code'})
df_country=df_country[['country_iso_code']]
# add Rest of World to the country dataframe
dict_row = {'country_iso_code':'ROW'}
df_country = df_country.append(dict_row, ignore_index = True)
df_country.info(verbose=True)
df1=df1.convert_dtypes()
#df1.info(verbose=True)
df1 = pd.merge(df1,df_country,on=['country_iso_code'])  
df1=df1.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df1

df_exgr = pd.merge(df_exgr,df_country,on=['country_iso_code'])  
df_exgr=df_exgr.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df_exgr






  df_country = df_country.append(dict_row, ignore_index = True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 1 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   country_iso_code  250 non-null    object
dtypes: object(1)
memory usage: 2.1+ KB


Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,HRV,WLD,DTOTAL,1995,Domestic CO2 emissions embodied in gross exports,5000000.0,CO2eq * t
1,HRV,WLD,DTOTAL,1996,Domestic CO2 emissions embodied in gross exports,5385000.0,CO2eq * t
2,HRV,WLD,DTOTAL,1997,Domestic CO2 emissions embodied in gross exports,5698000.0,CO2eq * t
3,HRV,WLD,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,6368000.0,CO2eq * t
4,HRV,WLD,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,6078000.0,CO2eq * t
...,...,...,...,...,...,...,...
135067,DEU,MMR,DTOTAL,2014,Domestic CO2 emissions embodied in gross exports,18000.0,CO2eq * t
135068,DEU,MMR,DTOTAL,2015,Domestic CO2 emissions embodied in gross exports,28000.0,CO2eq * t
135069,DEU,MMR,DTOTAL,2016,Domestic CO2 emissions embodied in gross exports,34000.0,CO2eq * t
135070,DEU,MMR,DTOTAL,2017,Domestic CO2 emissions embodied in gross exports,36000.0,CO2eq * t


Calcuate Scope 2:
GHG emissions occurring as a consequence of the domestic use of grid-supplied electricity, heat,
steam and/or cooling which is imported from another territory


In [11]:
rename_columns = {'value':'total','partner_iso_code':'country_iso_code','value_units' :'total_units'}
agg_columns = { 'value' : 'sum'}
columns_order = ['attribute','industry_code','country_iso_code','validity_date','total','total_units']
df1 = df1.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df1.convert_dtypes()

df1.info(verbose=True)
df1

######################

df_exgr = df_exgr.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df_exgr.convert_dtypes()

df_exgr.info(verbose=True)
df_exgr



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4032 entries, 0 to 4031
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         4032 non-null   string 
 1   industry_code     4032 non-null   string 
 2   country_iso_code  4032 non-null   string 
 3   validity_date     4032 non-null   Int64  
 4   total             4032 non-null   Float64
 5   total_units       4032 non-null   string 
dtypes: Float64(1), Int64(1), string(4)
memory usage: 197.0 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         2016 non-null   string 
 1   industry_code     2016 non-null   string 
 2   country_iso_code  2016 non-null   string 
 3   validity_date     2016 non-null   Int64  
 4   total             2016 non-null   Float64
 5   total_units    

Unnamed: 0,attribute,industry_code,country_iso_code,validity_date,total,total_units
0,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1995,2008843000.0,CO2eq * t
1,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1996,2055450000.0,CO2eq * t
2,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1997,2151870000.0,CO2eq * t
3,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1998,2177243000.0,CO2eq * t
4,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1999,2300588000.0,CO2eq * t
...,...,...,...,...,...,...
2011,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2014,266081000.0,CO2eq * t
2012,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2015,231751000.0,CO2eq * t
2013,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2016,211686000.0,CO2eq * t
2014,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2017,226732000.0,CO2eq * t


In [12]:
# divide the dataframe into two dataframes 
# to calculate Scope2 and Scope3 Values
df_D35 = df1[df1['industry_code']=='D35']
df_D35 = df_D35[['country_iso_code','validity_date','total','total_units']]
df_DTOTAL = df1[df1['industry_code']=='DTOTAL']
df_DTOTAL = df_DTOTAL[['country_iso_code','validity_date','total','total_units']]
df_result = pd.merge(df_D35,df_DTOTAL,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()

#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total_x':'scope2_value','total_y':'scope3_value','total_units_x':'scope2_value_units','total_units_y':'scope3_value_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result['scope3_value'] = df_result['scope3_value'] - df_result['scope2_value'] 
df_result[df_result['country_iso_code']=='CAN']

df_result = pd.merge(df_result,df_exgr,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()
#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total':'exported_emissions','total_units':'exported_emissions_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result.drop(columns=['attribute','industry_code'],inplace=True)
df_result.info(verbose=True)



<class 'pandas.core.frame.DataFrame'>
Int64Index: 2016 entries, 0 to 2015
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   country_iso_code          2016 non-null   string 
 1   validity_date             2016 non-null   Int64  
 2   scope2_value              2016 non-null   Float64
 3   scope2_value_units        2016 non-null   string 
 4   scope3_value              2016 non-null   Float64
 5   scope3_value_units        2016 non-null   string 
 6   exported_emissions        2016 non-null   Float64
 7   exported_emissions_units  2016 non-null   string 
dtypes: Float64(3), Int64(1), string(4)
memory usage: 149.6 KB


In [13]:
# Get the values saved in Trino containing  GHG emissions provided by UNFCCC & GDP PPP values from Worldbank

import pandas as pd


sql=f"""
select country_iso_code,validity_date,ghg_total_without_lulucf,ghg_total_without_lulucf_units,ghg_total_with_lulucf,ghg_total_with_lulucf_units
,gdp,gdp_units,gdp_ppp,gdp_ppp_units
from {ingest_catalog}.{ingest_schema}.{src_table_2} """ 
df_unfccc = pd.read_sql(sql, engine)

if (report_currency == 'EUR'):
    df_unfccc = pd.merge(df_unfccc,df_exch_eur,on=['validity_date'],how='inner')   
    df_unfccc['gdp'] = df_unfccc['gdp'] * df_unfccc['value']
    df_unfccc['gdp_ppp'] = df_unfccc['gdp_ppp'] * df_unfccc['value']
    df_unfccc['gdp_units'] = 'EUR'
    df_unfccc['gdp_ppp_units'] = 'EUR'
    df_unfccc.drop(columns=['value'],inplace=True)
    


df_unfccc[df_unfccc['country_iso_code']=='ARG']
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)
#df1["units"] = "kt"
#df1 = requantify_df(df1).convert_dtypes()
#df1.info(verbose=True)
#df1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12948 entries, 0 to 12947
Data columns (total 10 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                12948 non-null  string 
 1   validity_date                   12948 non-null  Int64  
 2   ghg_total_without_lulucf        2212 non-null   Float64
 3   ghg_total_without_lulucf_units  12948 non-null  string 
 4   ghg_total_with_lulucf           2212 non-null   Float64
 5   ghg_total_with_lulucf_units     12948 non-null  string 
 6   gdp                             12854 non-null  Float64
 7   gdp_units                       12948 non-null  string 
 8   gdp_ppp                         7210 non-null   Float64
 9   gdp_ppp_units                   12948 non-null  string 
dtypes: Float64(4), Int64(1), string(5)
memory usage: 1.0 MB


In [14]:
import numpy as np
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)

df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))

df_unfccc = requantify_df(df_unfccc)
df_unfccc.info(verbose=True)
# convert to CO2 units to Mt
df_unfccc['ghg_total_without_lulucf'] = df_unfccc['ghg_total_without_lulucf'].pint.to("t CO2e")
df_unfccc['ghg_total_with_lulucf'] = df_unfccc['ghg_total_with_lulucf'].pint.to("t CO2e")
####

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf==0, np.nan, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf==0, np.nan, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp==0, np.nan, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp==0, np.nan, df_unfccc.gdp_ppp))

df_unfccc.info(verbose=True)


df_unfccc[df_unfccc['country_iso_code']=='ARG']



#df_unfccc['gdp'] = df_unfccc['gdp'].pint.to("Millions USD")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12948 entries, 0 to 12947
Data columns (total 10 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                12948 non-null  string 
 1   validity_date                   12948 non-null  Int64  
 2   ghg_total_without_lulucf        2212 non-null   Float64
 3   ghg_total_without_lulucf_units  12948 non-null  string 
 4   ghg_total_with_lulucf           2212 non-null   Float64
 5   ghg_total_with_lulucf_units     12948 non-null  string 
 6   gdp                             12854 non-null  Float64
 7   gdp_units                       12948 non-null  string 
 8   gdp_ppp                         7210 non-null   Float64
 9   gdp_ppp_units                   12948 non-null  string 
dtypes: Float64(4), Int64(1), string(5)
memory usage: 1.0 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12948 entries, 0 to 12947
Data columns (total 6 

  return np.array(qtys, dtype="object", copy=copy)
  return np.array(qtys, dtype="object", copy=copy)


Unnamed: 0,country_iso_code,validity_date,ghg_total_without_lulucf,ghg_total_with_lulucf,gdp,gdp_ppp
1405,ARG,2005,0.0,0.0,198737095012.282,527729109507.491
1406,ARG,1977,0.0,0.0,56781000100.9448,0.0
1407,ARG,2004,0.0,0.0,164657930452.787,470169433019.259
1408,ARG,1996,0.0,0.0,272149750000.0,372888233736.503
1409,ARG,1978,0.0,0.0,58082870156.2634,0.0
3091,ARG,1987,0.0,0.0,111106191358.197,0.0
3092,ARG,1969,0.0,0.0,31256284543.6155,0.0
3093,ARG,2008,0.0,0.0,361558037110.419,697557858459.69
3094,ARG,1964,0.0,0.0,25605249381.7597,0.0
3770,ARG,1973,0.0,0.0,52544000116.9037,0.0


In [15]:
#df_unfccc = dequantify_df(df_unfccc)
#df_unfccc
#df_unfccc.info(verbose=True)
#df_unfccc

df_result= requantify_df(df_result)
#df_result= dequantify_df(df_result)
#df_result


<font size=3><b>Merge UNFCCC , Worldbank and OECD data</b> </font>

In [16]:
#df_result.info(verbose=True)
#df_result= requantify_df(df_result)
#df_result.info(verbose=True)

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))


df_result = pd.merge(df_result,df_unfccc,on=['country_iso_code','validity_date'],how='outer')  

rename_columns = {'ghg_total_without_lulucf':'scope1_excl_lulucf',
                  'ghg_total_with_lulucf':'scope1_incl_lulucf'}
#columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_incl_lulucf','scope1_incl_lulucf_units','scope2_value','scope2_value_units','scope3_value','scope3_value_units','exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units']
df_result = df_result.rename(columns=rename_columns)
df_result.info(verbose=True)

#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope1_incl_lulucf'] = df_result['scope1_incl_lulucf'].fillna(0).astype(float)
#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope2_value'] = df_result['scope2_value'].fillna(0).astype(float)
#df_result['scope3_value'] = df_result['scope3_value'].fillna(0).astype(float)



df_result['sum_yn'] = 1 * ((df_result['scope1_excl_lulucf'] > 0) & (df_result['scope2_value'] > 0) &  (df_result['scope3_value'] > 0) )
df_result['calc_attr_factor_yn'] = 1 * (df_result['gdp_ppp'] > 0)

df_result['consumption_emissions_excl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])
df_result['consumption_emissions_incl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])
###############
df_result['attribution_factor_scope1_excl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_excl_lulucf']) / df_result['gdp_ppp']
df_result['attribution_factor_scope1_incl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_incl_lulucf']) / df_result['gdp_ppp']

df_result['attribution_factor_excl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
df_result['attribution_factor_incl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
##########
df_result['attribution_factor_excl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])
df_result['attribution_factor_incl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])



#df_result['ghg_total_without_lulucf']=df_result['ghg_total_without_lulucf'].replace(0, np.nan)
#df_result['ghg_total_with_lulucf']=df_result['ghg_total_with_lulucf'].replace(0, np.nan)
#df_result['gdp']=df_result['gdp'].replace(0, np.nan)
#df_result['gdp_ppp']=df_result['gdp_ppp'].replace(0, np.nan)



#df_result

columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_incl_lulucf','scope2_value','scope3_value',
                 'exported_emissions','gdp_ppp','gdp',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
df_result = df_result.reindex(columns=columns_order)
df_result.info(verbose=True)
df_result= dequantify_df(df_result)
df_result.info(verbose=True)
# remove invalid country_iso_codes (regions, ...)

df_result = pd.merge(df_result,df_country,on=['country_iso_code'])  

cols= ['scope1_excl_lulucf','scope1_incl_lulucf','scope2_value','scope3_value',
                 'exported_emissions','gdp_ppp','gdp',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
df_result[cols] = df_result[cols].replace({0:np.nan})


df_result


<class 'pandas.core.frame.DataFrame'>
Int64Index: 13380 entries, 0 to 13379
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   country_iso_code    13380 non-null  string         
 1   validity_date       13380 non-null  Int64          
 2   scope2_value        2016 non-null   pint[CO2eq * t]
 3   scope3_value        2016 non-null   pint[CO2eq * t]
 4   exported_emissions  2016 non-null   pint[CO2eq * t]
 5   scope1_excl_lulucf  12948 non-null  pint[CO2eq * t]
 6   scope1_incl_lulucf  12948 non-null  pint[CO2eq * t]
 7   gdp                 12948 non-null  pint[USD]      
 8   gdp_ppp             12948 non-null  pint[USD]      
dtypes: Int64(1), pint[CO2eq * t](5), pint[USD](2), string(1)
memory usage: 1.0 MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 13380 entries, 0 to 13379
Data columns (total 17 columns):
 #   Column                                 Non-Null Count  Dtype        

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,scope3_value_units,...,attribution_factor_scope1_incl_lulucf,attribution_factor_scope1_incl_lulucf_units,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_units,attribution_factor_excl_lulucf_gdp,attribution_factor_excl_lulucf_gdp_units,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units
0,ARG,1995,,CO2eq * t,,CO2eq * t,21000.0,CO2eq * t,9356000.0,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
1,ARG,1996,,CO2eq * t,,CO2eq * t,28000.0,CO2eq * t,11287000.0,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
2,ARG,1997,2.709103e+08,CO2eq * t,2.419562e+08,CO2eq * t,26000.0,CO2eq * t,14968000.0,CO2eq * t,...,0.00059,CO2eq * t / USD,0.000697,CO2eq * t / USD,0.000976,CO2eq * t / USD,0.000626,CO2eq * t / USD,0.000877,CO2eq * t / USD
3,ARG,1998,,CO2eq * t,,CO2eq * t,43000.0,CO2eq * t,13397000.0,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
4,ARG,1999,,CO2eq * t,,CO2eq * t,61000.0,CO2eq * t,12270000.0,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10248,COK,1994,8.030046e+04,CO2eq * t,-7.413774e+04,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
10249,PRK,1990,2.075440e+08,CO2eq * t,1.938170e+08,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
10250,PRK,2002,8.733000e+07,CO2eq * t,7.045000e+07,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
10251,PRK,1994,1.575290e+08,CO2eq * t,1.441590e+08,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD


In [17]:
#df_result['scope1_value_units'] = 'Mt CO2e'
#df_result['scope1_value'] = df_result['scope1_value'].round(decimals = 3)
#pd.options.display.float_format = '{:.3f}'.format
#df_result= df_result.convert_dtypes()
#df_result.info(verbose=True)
df_result[df_result['country_iso_code']=='CAN']
#df_result

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,scope3_value_units,...,attribution_factor_scope1_incl_lulucf,attribution_factor_scope1_incl_lulucf_units,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_units,attribution_factor_excl_lulucf_gdp,attribution_factor_excl_lulucf_gdp_units,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units
400,CAN,1995,6.562987e+08,CO2eq * t,6.265742e+08,CO2eq * t,5129000.0,CO2eq * t,179111000.0,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
401,CAN,1996,6.787145e+08,CO2eq * t,6.419787e+08,CO2eq * t,5269000.0,CO2eq * t,188198000.0,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
402,CAN,1997,6.911954e+08,CO2eq * t,6.529372e+08,CO2eq * t,6362000.0,CO2eq * t,202972000.0,CO2eq * t,...,0.000864,CO2eq * t / USD,0.001192,CO2eq * t / USD,0.001375,CO2eq * t / USD,0.001141,CO2eq * t / USD,0.001316,CO2eq * t / USD
403,CAN,1998,6.969015e+08,CO2eq * t,6.470244e+08,CO2eq * t,7993000.0,CO2eq * t,214108000.0,CO2eq * t,...,0.000815,CO2eq * t / USD,0.001158,CO2eq * t / USD,0.001450,CO2eq * t / USD,0.001095,CO2eq * t / USD,0.001371,CO2eq * t / USD
404,CAN,1999,7.096009e+08,CO2eq * t,6.742546e+08,CO2eq * t,9100000.0,CO2eq * t,222868000.0,CO2eq * t,...,0.000797,CO2eq * t / USD,0.001112,CO2eq * t / USD,0.001388,CO2eq * t / USD,0.001071,CO2eq * t / USD,0.001336,CO2eq * t / USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,CAN,1963,,CO2eq * t,,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
457,CAN,1975,,CO2eq * t,,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
458,CAN,1970,,CO2eq * t,,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
459,CAN,1967,,CO2eq * t,,CO2eq * t,,CO2eq * t,,CO2eq * t,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD


Calculate Attribution factor = (Scope1+Scope2+Scope3)/GDP


In [18]:


#df_result= requantify_df(df_result)
format_mapper =  {'gdp_ppp': '{0:,.0f}',
           'gdp': '{0:,.0f}',       
           'attribution_factor_excl_lulucf': '{0:.8f}',
           'attribution_factor_incl_lulucf': '{0:.8f}',
           'attribution_factor_excl_lulucf_gdp': '{0:.8f}',
           'attribution_factor_incl_lulucf_gdp': '{0:.8f}',       
           'scope1_excl_lulucf': '{0:,.0f}',       
           'scope1_incl_lulucf': '{0:,.0f}',              
           'scope2_value': '{0:,.0f}',              
           'scope3_value': '{0:,.0f}'
                 }

df_result[df_result['country_iso_code']=='CAN'].style.format(format_mapper)


Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,scope3_value_units,exported_emissions,exported_emissions_units,gdp_ppp,gdp_ppp_units,gdp,gdp_units,consumption_emissions_excl_lulucf,consumption_emissions_excl_lulucf_units,consumption_emissions_incl_lulucf,consumption_emissions_incl_lulucf_units,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_excl_lulucf_units,attribution_factor_scope1_incl_lulucf,attribution_factor_scope1_incl_lulucf_units,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_units,attribution_factor_excl_lulucf_gdp,attribution_factor_excl_lulucf_gdp_units,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units
400,CAN,1995,656298717.0,CO2eq * t,626574181.0,CO2eq * t,5129000.0,CO2eq * t,179111000.0,CO2eq * t,136648000.0,CO2eq * t,,USD,604031623433,USD,703890717.315296,CO2eq * t,674166180.603995,CO2eq * t,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
401,CAN,1996,678714489.0,CO2eq * t,641978685.0,CO2eq * t,5269000.0,CO2eq * t,188198000.0,CO2eq * t,139744000.0,CO2eq * t,,USD,628546387972,USD,732437488.508001,CO2eq * t,695701685.136717,CO2eq * t,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
402,CAN,1997,691195367.0,CO2eq * t,652937162.0,CO2eq * t,6362000.0,CO2eq * t,202972000.0,CO2eq * t,160734000.0,CO2eq * t,755390170461.0,USD,654986999856,USD,739795367.182711,CO2eq * t,701537162.40306,CO2eq * t,0.000915,CO2eq * t / USD,0.000864,CO2eq * t / USD,0.00119214,CO2eq * t / USD,0.00137488,CO2eq * t / USD,0.00114149,CO2eq * t / USD,0.00131647,CO2eq * t / USD
403,CAN,1998,696901519.0,CO2eq * t,647024422.0,CO2eq * t,7993000.0,CO2eq * t,214108000.0,CO2eq * t,166623000.0,CO2eq * t,793674327704.0,USD,634000000000,USD,752379518.588772,CO2eq * t,702502422.222185,CO2eq * t,0.000878,CO2eq * t / USD,0.000815,CO2eq * t / USD,0.00115791,CO2eq * t / USD,0.00144953,CO2eq * t / USD,0.00109507,CO2eq * t / USD,0.00137086,CO2eq * t / USD
404,CAN,1999,709600924.0,CO2eq * t,674254607.0,CO2eq * t,9100000.0,CO2eq * t,222868000.0,CO2eq * t,171505000.0,CO2eq * t,846412945810.0,USD,678412196271,USD,770063924.294885,CO2eq * t,734717607.218812,CO2eq * t,0.000838,CO2eq * t / USD,0.000797,CO2eq * t / USD,0.00111242,CO2eq * t / USD,0.0013879,CO2eq * t / USD,0.00107066,CO2eq * t / USD,0.0013358,CO2eq * t / USD
405,CAN,2000,733511337.0,CO2eq * t,711770284.0,CO2eq * t,18191000.0,CO2eq * t,236036000.0,CO2eq * t,178858000.0,CO2eq * t,900996986801.0,USD,744773415932,USD,808880337.261223,CO2eq * t,787139284.018432,CO2eq * t,0.000814,CO2eq * t / USD,0.00079,CO2eq * t / USD,0.00109627,CO2eq * t / USD,0.00132623,CO2eq * t / USD,0.00107214,CO2eq * t / USD,0.00129704,CO2eq * t / USD
406,CAN,2001,722935012.0,CO2eq * t,685252427.0,CO2eq * t,19531000.0,CO2eq * t,228887000.0,CO2eq * t,175811000.0,CO2eq * t,937786776177.0,USD,738981792355,USD,795542012.117626,CO2eq * t,757859426.599736,CO2eq * t,0.000771,CO2eq * t / USD,0.000731,CO2eq * t / USD,0.00103579,CO2eq * t / USD,0.00131445,CO2eq * t / USD,0.00099561,CO2eq * t / USD,0.00126346,CO2eq * t / USD
407,CAN,2002,726555183.0,CO2eq * t,706282510.0,CO2eq * t,8195000.0,CO2eq * t,233500000.0,CO2eq * t,165582000.0,CO2eq * t,971003788233.0,USD,760649334098,USD,802668183.452183,CO2eq * t,782395509.635937,CO2eq * t,0.000748,CO2eq * t / USD,0.000727,CO2eq * t / USD,0.00099716,CO2eq * t / USD,0.00127293,CO2eq * t / USD,0.00097629,CO2eq * t / USD,0.00124627,CO2eq * t / USD
408,CAN,2003,744831887.0,CO2eq * t,720393316.0,CO2eq * t,8220000.0,CO2eq * t,232392000.0,CO2eq * t,176420000.0,CO2eq * t,1023682523003.0,USD,895540646635,USD,809023886.83637,CO2eq * t,784585315.54756,CO2eq * t,0.000728,CO2eq * t / USD,0.000704,CO2eq * t / USD,0.00096265,CO2eq * t / USD,0.00110039,CO2eq * t / USD,0.00093877,CO2eq * t / USD,0.0010731,CO2eq * t / USD
409,CAN,2004,745919782.0,CO2eq * t,749383582.0,CO2eq * t,8411000.0,CO2eq * t,238375000.0,CO2eq * t,191817000.0,CO2eq * t,1083609294356.0,USD,1026690238278,USD,800888782.435955,CO2eq * t,804352581.637471,CO2eq * t,0.000688,CO2eq * t / USD,0.000692,CO2eq * t / USD,0.00091611,CO2eq * t / USD,0.0009669,CO2eq * t / USD,0.00091931,CO2eq * t / USD,0.00097027,CO2eq * t / USD


<font size=3>Save the results in Trino</font>

In [19]:
# Add the sources used for the calculations in the result table
df_result['scope1_excl_source'] = "UNFCCC -> Time_Series_GHG_total_without_LULUCF_in_kt_CO2_equivalent.xlsx"
df_result['scope1_incl_source'] = "UNFCCC -> Time_Series_GHG_total_with_LULUCF_in_kt_CO2_equivalent.xlsx"
df_result['scope2_source'] = "OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE"
df_result['scope3_source'] = "OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value "          
df_result['gdp_ppp_source'] = "Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv"          


In [20]:
import osc_ingest_trino as osc
#df_result['validity_date']=pd.to_datetime(df_result['validity_date'], unit='D')
df_result=df_result.convert_dtypes()
df_result.info(verbose=True)

df_result['year'] = df_result['validity_date']
df_result['validity_date'] = pd.to_datetime(df_result.validity_date, format='%Y')
df_result = df_result.convert_dtypes()
df_result.validity_date = df_result.validity_date.astype('datetime64[ns]')



columnschema = osc.create_table_schema_pairs(df_result,typemap={'datetime64[ns]':'timestamp(6)'}) 
sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
print(sql)
qres = engine.execute(sql)
print(qres.fetchall())

tabledef = f"""
create table if not exists {ingest_catalog}.{ingest_schema}.{ingest_table}(
{columnschema}
) with (
    format = 'ORC',
    partitioning = ARRAY['country_iso_code']
)
"""
print(tabledef)

table_create = engine.execute(tabledef)
print(table_create.fetchall())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10253 entries, 0 to 10252
Data columns (total 37 columns):
 #   Column                                       Non-Null Count  Dtype  
---  ------                                       --------------  -----  
 0   country_iso_code                             10253 non-null  string 
 1   validity_date                                10253 non-null  Int64  
 2   scope1_excl_lulucf                           2212 non-null   Float64
 3   scope1_excl_lulucf_units                     10253 non-null  string 
 4   scope1_incl_lulucf                           2212 non-null   Float64
 5   scope1_incl_lulucf_units                     10253 non-null  string 
 6   scope2_value                                 1419 non-null   Float64
 7   scope2_value_units                           10253 non-null  string 
 8   scope3_value                                 1608 non-null   Int64  
 9   scope3_value_units                           10253 non-null  string 
 10

In [21]:
df_result.to_sql(ingest_table,
           con=engine,
           schema=ingest_schema,
           if_exists='append',
           index=False,
           method=osc.TrinoBatchInsert(batch_size = 1000, verbose = True))

constructed fully qualified table name as: "pcaf_sovereign_footprint.sf_total_sovereign_emissions"
inserting 1000 records
  ('ARG', TIMESTAMP '1995-01-01 00:00:00', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 21000.0, 'CO2eq * t', 9356000, 'CO2eq * t', 19685000, 'CO2eq * t', 347005607350.473, 'USD', 258031750000.0, 'USD', NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', 'UNFCCC -> Time_Series_GHG_total_without_LULUCF_in_kt_CO2_equivalent.xlsx', 'UNFCCC -> Time_Series_GHG_total_with_LULUCF_in_kt_CO2_equivalent.xlsx', 'OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE', 'OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value ', 'Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv', 1995)
  ('ARG', TIMESTAMP '1996-01-01 00:00:00', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 28000.0, 'CO2eq * t', 11287000, 'CO2eq

In [22]:
df_result = df_result[df_result['year']>=1990]
df_result.sort_values(by=['country_iso_code','validity_date'], inplace=True)
df_result.to_excel("pcaf_results.xls",index=False,encoding='utf-8')
sql=f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table} where country_iso_code = 'CAN' and year = 2018 """
pd.read_sql(sql, engine)

  df_result.to_excel("pcaf_results.xls",index=False,encoding='utf-8')


Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,scope3_value_units,...,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units,scope1_excl_source,scope1_incl_source,scope2_source,scope3_source,gdp_ppp_source,year
0,CAN,2018-01-01 00:00:00.000,728475900.0,CO2eq * t,736887000.0,CO2eq * t,4953000.0,CO2eq * t,260931000,CO2eq * t,...,0.000541,CO2eq * t / USD,0.000581,CO2eq * t / USD,UNFCCC -> Time_Series_GHG_total_without_LULUCF...,UNFCCC -> Time_Series_GHG_total_with_LULUCF_in...,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2018
