<font size=6> The total sovereign emissions calculated by adding up scope 1, 2 and 3 emissions  </font>

In [1]:
report_currency = 'USD'

In [2]:
from dotenv import dotenv_values, load_dotenv
import osc_ingest_trino as osc
import os
import pathlib
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [3]:
import trino
from sqlalchemy.engine import create_engine

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

#trino_bucket = osc.attach_s3_bucket("S3_DEV")

In [4]:
import boto3
import pandas as pd
import pint

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

In [5]:
def requantify_df(df):
    units_col = None
    columns_reversed = reversed(df.columns)
    for col in columns_reversed:
        if col.endswith("_units"):
            if units_col:
                # We expect _units column to follow a non-units column
                raise ValueError
            units_col = col
            continue
        if units_col:
            if col + '_units' != units_col:
                raise ValueError
            if (df[units_col]==df[units_col][0]).all():
                # Make a PintArray
                new_col = PintArray(df[col], dtype=f"pint[{ureg(df[units_col][0]).u}]")
            else:
                # Make a pd.Series of Quantity in a way that does not throw UnitStrippedWarning
                new_col = pd.Series(data=df[col], name=col) * pd.Series(data=df[units_col].map(lambda x: ureg(x).u), name=col)
            df = df.drop(columns=units_col)
            df[col] = new_col
            units_col = None
    return df


In [6]:
# If DF_COL contains Pint quantities (because it is a PintArray or an array of Pint Quantities),
# return a two-column dataframe of magnitudes and units.
# If DF_COL contains no Pint quanities, return it unchanged.

def dequantify_column(df_col: pd.Series):
    if type(df_col.values)==PintArray:
        return pd.DataFrame({df_col.name: df_col.values.quantity.m,
                             df_col.name + "_units": str(df_col.values.dtype.units)},
                            index=df_col.index)
    elif df_col.size==0:
        return df_col
    elif isinstance(df_col.iloc[0], Quantity):
        values = df_col.map(lambda x: (x.m, x.u))
        return pd.DataFrame({df_col.name: df_col.map(lambda x: x.m),
                             df_col.name + "_units": df_col.map(lambda x: str(x.u))},
                            index=df_col.index)
    else:
        return df_col

# Rewrite dataframe DF so that columns containing Pint quantities are represented by a column for the Magnitude and column for the Units.
# The magnitude column retains the original column name and the units column is renamed with a _units suffix.
def dequantify_df(df):
    return pd.concat([dequantify_column(df[col]) for col in df.columns], axis=1)

In [7]:
# Get the values saved in Trino related with Foreign CO2 emissions embodied in gross imports

import pandas as pd
import pandas as pd
from openscm_units import unit_registry
from pint import set_application_registry, Quantity
from pint_pandas import PintArray, PintType

ureg = unit_registry
Q_ = ureg.Quantity
ureg.default_format = '~'
ureg.define("CO2e = CO2 = CO2eq")
ureg.define("USD = [currency] ")
ureg.define("EUR = [currency_EUR] ")
ureg.define('Millions=1000000')
set_application_registry(ureg)


ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'pcaf_sovereign_footprint'
ingest_table = 'sf_total_sovereign_emissions'


src_table_1 = 'sf_oecd_imgr_fco2'
src_table_2 = 'sf_unfccc_results'
src_table_3 = 'sf_oecd_exgr_dco2'
src_table_4 = 'sf_oecd_exch_rates'
src_table_5 = 'sf_wdi_population'

sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_1} """ 
df1 = pd.read_sql(sql, engine)
df1 = requantify_df(df1)
df1
# convert to CO2 units to t
df1['value'] = df1['value'].pint.to("t CO2e")
df1 = dequantify_df(df1)
df1

Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,COL,SVN,DTOTAL,2011,Foreign CO2 emissions embodied in gross imports,7000.0,CO2eq * t
1,COL,SVN,DTOTAL,2012,Foreign CO2 emissions embodied in gross imports,4000.0,CO2eq * t
2,COL,SVN,DTOTAL,2013,Foreign CO2 emissions embodied in gross imports,5000.0,CO2eq * t
3,COL,SVN,DTOTAL,2014,Foreign CO2 emissions embodied in gross imports,4000.0,CO2eq * t
4,COL,SVN,DTOTAL,2015,Foreign CO2 emissions embodied in gross imports,6000.0,CO2eq * t
...,...,...,...,...,...,...,...
334651,EU28,NLD,DTOTAL,2006,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t
334652,EU28,NLD,DTOTAL,2007,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t
334653,EU28,NLD,DTOTAL,2008,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t
334654,EU28,NLD,DTOTAL,2009,Foreign CO2 emissions embodied in gross imports,0.0,CO2eq * t


In [8]:
df1.to_csv("oecd_imgr_fco2_results.csv",index=False)

In [9]:
sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_3} where industry_code='DTOTAL'""" 
df_exgr = pd.read_sql(sql, engine)
df_exgr = requantify_df(df_exgr)
# convert to CO2 units to t
df_exgr['value'] = df_exgr['value'].pint.to("t CO2e")
df_exgr = dequantify_df(df_exgr)
df_exgr

Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,KAZ,WLD,DTOTAL,1995,Domestic CO2 emissions embodied in gross exports,49629000.0,CO2eq * t
1,KAZ,WLD,DTOTAL,1996,Domestic CO2 emissions embodied in gross exports,36623000.0,CO2eq * t
2,KAZ,WLD,DTOTAL,1997,Domestic CO2 emissions embodied in gross exports,32152000.0,CO2eq * t
3,KAZ,WLD,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,29628000.0,CO2eq * t
4,KAZ,WLD,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,40478000.0,CO2eq * t
...,...,...,...,...,...,...,...
167323,ROW,ESP,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,9849000.0,CO2eq * t
167324,ROW,ESP,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,11882000.0,CO2eq * t
167325,ROW,ESP,DTOTAL,2000,Domestic CO2 emissions embodied in gross exports,15533000.0,CO2eq * t
167326,ROW,ESP,DTOTAL,2001,Domestic CO2 emissions embodied in gross exports,15888000.0,CO2eq * t


In [10]:
###########################

sql=f"""
select validity_date,value from {ingest_catalog}.{ingest_schema}.{src_table_4} where country_iso_code='DEU'""" 
df_exch_eur = pd.read_sql(sql, engine)
df_exch_eur

Unnamed: 0,validity_date,value
0,1950,2.144861
1,1951,2.144861
2,1952,2.144861
3,1953,2.147426
4,1954,2.147426
...,...,...
67,2017,0.885206
68,2018,0.846773
69,2019,0.893276
70,2020,0.875506


In [11]:
import pandas as pd 
sql=f"""
select country_iso_code,validity_date,value from {ingest_catalog}.{ingest_schema}.{src_table_5} """ 
df_population = pd.read_sql(sql, engine)
df_population["value"] = pd.to_numeric(df_population["value"], errors='coerce')
df_population["validity_date"] = pd.to_numeric(df_population["validity_date"], errors='coerce')
df_population=df_population.convert_dtypes()
df_population
#df_population.info(verbose=True)


Unnamed: 0,country_iso_code,validity_date,value
0,AFG,2017,36296111
1,ALB,2017,2873457
2,DZA,2017,41389174
3,ASM,2017,55617
4,AND,2017,76997
...,...,...,...
3187,SSF,2014,968958352
3188,SSA,2014,968866993
3189,TSS,2014,968958352
3190,UMC,2014,2394063066


<font size=3> Remove invalid country codes </font>

In [12]:
import pycountry
import pandas as pd
df_country = pd.DataFrame([country.__dict__['_fields'] for country in pycountry.countries])
df_country = df_country.rename(columns={'alpha_3':'country_iso_code'})
df_country=df_country[['country_iso_code']]
# add Rest of World to the country dataframe
dict_row = {'country_iso_code':'ROW'}
df_country = pd.concat([df_country,pd.DataFrame([dict_row])],ignore_index=True)
df_country.info(verbose=True)
df1=df1.convert_dtypes()
#df1.info(verbose=True)
df1 = pd.merge(df1,df_country,on=['country_iso_code'])  
df1=df1.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df1

df_exgr = pd.merge(df_exgr,df_country,on=['country_iso_code'])  
df_exgr=df_exgr.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df_exgr






<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 1 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   country_iso_code  250 non-null    object
dtypes: object(1)
memory usage: 2.1+ KB


Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,KAZ,WLD,DTOTAL,1995,Domestic CO2 emissions embodied in gross exports,49629000.0,CO2eq * t
1,KAZ,WLD,DTOTAL,1996,Domestic CO2 emissions embodied in gross exports,36623000.0,CO2eq * t
2,KAZ,WLD,DTOTAL,1997,Domestic CO2 emissions embodied in gross exports,32152000.0,CO2eq * t
3,KAZ,WLD,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,29628000.0,CO2eq * t
4,KAZ,WLD,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,40478000.0,CO2eq * t
...,...,...,...,...,...,...,...
135067,VNM,MMR,DTOTAL,2014,Domestic CO2 emissions embodied in gross exports,169000.0,CO2eq * t
135068,VNM,MMR,DTOTAL,2015,Domestic CO2 emissions embodied in gross exports,232000.0,CO2eq * t
135069,VNM,MMR,DTOTAL,2016,Domestic CO2 emissions embodied in gross exports,353000.0,CO2eq * t
135070,VNM,MMR,DTOTAL,2017,Domestic CO2 emissions embodied in gross exports,313000.0,CO2eq * t


In [13]:
df_country

Unnamed: 0,country_iso_code
0,ABW
1,AFG
2,AGO
3,AIA
4,ALA
...,...
245,YEM
246,ZAF
247,ZMB
248,ZWE


Calcuate Scope 2:
GHG emissions occurring as a consequence of the domestic use of grid-supplied electricity, heat,
steam and/or cooling which is imported from another territory


In [14]:
rename_columns = {'value':'total','partner_iso_code':'country_iso_code','value_units' :'total_units'}
agg_columns = { 'value' : 'sum'}
columns_order = ['attribute','industry_code','country_iso_code','validity_date','total','total_units']
df1 = df1.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df1.convert_dtypes()

df1.info(verbose=True)
df1

######################

df_exgr = df_exgr.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df_exgr.convert_dtypes()

df_exgr.info(verbose=True)
df_exgr



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4032 entries, 0 to 4031
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         4032 non-null   string 
 1   industry_code     4032 non-null   string 
 2   country_iso_code  4032 non-null   string 
 3   validity_date     4032 non-null   Int64  
 4   total             4032 non-null   Float64
 5   total_units       4032 non-null   string 
dtypes: Float64(1), Int64(1), string(4)
memory usage: 197.0 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         2016 non-null   string 
 1   industry_code     2016 non-null   string 
 2   country_iso_code  2016 non-null   string 
 3   validity_date     2016 non-null   Int64  
 4   total             2016 non-null   Float64
 5   total_units    

Unnamed: 0,attribute,industry_code,country_iso_code,validity_date,total,total_units
0,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1995,2008843000.0,CO2eq * t
1,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1996,2055450000.0,CO2eq * t
2,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1997,2151870000.0,CO2eq * t
3,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1998,2177243000.0,CO2eq * t
4,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1999,2300588000.0,CO2eq * t
...,...,...,...,...,...,...
2011,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2014,266081000.0,CO2eq * t
2012,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2015,231751000.0,CO2eq * t
2013,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2016,211686000.0,CO2eq * t
2014,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2017,226732000.0,CO2eq * t


In [15]:
#df1.to_csv("oecd_imgr_fco2_results2.csv",index=False)

In [16]:
# divide the dataframe into two dataframes 
# to calculate Scope2 and Scope3 Values
df_D35 = df1[df1['industry_code']=='D35']
df_D35 = df_D35[['country_iso_code','validity_date','total','total_units']]
df_DTOTAL = df1[df1['industry_code']=='DTOTAL']
df_DTOTAL = df_DTOTAL[['country_iso_code','validity_date','total','total_units']]
df_result = pd.merge(df_D35,df_DTOTAL,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()

#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total_x':'scope2_value','total_y':'scope3_value','total_units_x':'scope2_value_units','total_units_y':'scope3_value_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result['scope3_value'] = df_result['scope3_value'] - df_result['scope2_value'] 
df_result[df_result['country_iso_code']=='CAN']

df_result = pd.merge(df_result,df_exgr,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()
#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total':'exported_emissions','total_units':'exported_emissions_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result.drop(columns=['attribute','industry_code'],inplace=True)
df_result.info(verbose=True)



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   country_iso_code          2016 non-null   string 
 1   validity_date             2016 non-null   Int64  
 2   scope2_value              2016 non-null   Float64
 3   scope2_value_units        2016 non-null   string 
 4   scope3_value              2016 non-null   Float64
 5   scope3_value_units        2016 non-null   string 
 6   exported_emissions        2016 non-null   Float64
 7   exported_emissions_units  2016 non-null   string 
dtypes: Float64(3), Int64(1), string(4)
memory usage: 134.0 KB


In [17]:
# Get the values saved in Trino containing  GHG emissions provided by UNFCCC & GDP PPP values from Worldbank

import pandas as pd


sql=f"""
select country_iso_code,validity_date,ghg_total_without_lulucf,ghg_total_without_lulucf_units,scope1_excl_source,ghg_total_with_lulucf,ghg_total_with_lulucf_units
,scope1_incl_source,gdp,gdp_units,gdp_ppp,gdp_ppp_units
from {ingest_catalog}.{ingest_schema}.{src_table_2} """ 
df_unfccc = pd.read_sql(sql, engine)

if (report_currency == 'EUR'):
    df_unfccc = pd.merge(df_unfccc,df_exch_eur,on=['validity_date'],how='inner')   
    df_unfccc['gdp'] = df_unfccc['gdp'] * df_unfccc['value']
    df_unfccc['gdp_ppp'] = df_unfccc['gdp_ppp'] * df_unfccc['value']
    df_unfccc['gdp_units'] = 'EUR'
    df_unfccc['gdp_ppp_units'] = 'EUR'
    df_unfccc.drop(columns=['value'],inplace=True)
    


df_unfccc[df_unfccc['country_iso_code']=='ARG']
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)
#df1["units"] = "kt"
#df1 = requantify_df(df1).convert_dtypes()
#df1.info(verbose=True)
#df1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15025 entries, 0 to 15024
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                15025 non-null  string 
 1   validity_date                   15025 non-null  Int64  
 2   ghg_total_without_lulucf        10633 non-null  Float64
 3   ghg_total_without_lulucf_units  15025 non-null  string 
 4   scope1_excl_source              10633 non-null  string 
 5   ghg_total_with_lulucf           10634 non-null  Float64
 6   ghg_total_with_lulucf_units     15025 non-null  string 
 7   scope1_incl_source              10634 non-null  string 
 8   gdp                             13118 non-null  Float64
 9   gdp_units                       15025 non-null  string 
 10  gdp_ppp                         7472 non-null   Float64
 11  gdp_ppp_units                   15025 non-null  string 
dtypes: Float64(4), Int64(1), string(

In [18]:
import numpy as np
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)

df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))

df_unfccc = requantify_df(df_unfccc)
df_unfccc.info(verbose=True)
# convert to CO2 units to Mt
df_unfccc['ghg_total_without_lulucf'] = df_unfccc['ghg_total_without_lulucf'].pint.to("t CO2e")
df_unfccc['ghg_total_with_lulucf'] = df_unfccc['ghg_total_with_lulucf'].pint.to("t CO2e")
####

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf==0, np.nan, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf==0, np.nan, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp==0, np.nan, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp==0, np.nan, df_unfccc.gdp_ppp))

df_unfccc.info(verbose=True)


df_unfccc[df_unfccc['country_iso_code']=='ARG']



#df_unfccc['gdp'] = df_unfccc['gdp'].pint.to("Millions USD")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15025 entries, 0 to 15024
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                15025 non-null  string 
 1   validity_date                   15025 non-null  Int64  
 2   ghg_total_without_lulucf        10633 non-null  Float64
 3   ghg_total_without_lulucf_units  15025 non-null  string 
 4   scope1_excl_source              10633 non-null  string 
 5   ghg_total_with_lulucf           10634 non-null  Float64
 6   ghg_total_with_lulucf_units     15025 non-null  string 
 7   scope1_incl_source              10634 non-null  string 
 8   gdp                             13118 non-null  Float64
 9   gdp_units                       15025 non-null  string 
 10  gdp_ppp                         7472 non-null   Float64
 11  gdp_ppp_units                   15025 non-null  string 
dtypes: Float64(4), Int64(1), string(



Unnamed: 0,country_iso_code,validity_date,ghg_total_without_lulucf,scope1_excl_source,ghg_total_with_lulucf,scope1_incl_source,gdp,gdp_ppp
496,ARG,2010,321000000.0,PRIMAP,447642430.0,UNFCCC,423627422092.49,736799121185.705
498,ARG,2012,334000000.0,PRIMAP,429478449.93,UNFCCC,545982375701.128,819697901873.041
882,ARG,2009,310000000.0,PRIMAP,376680995.0,PRIMAP,332976484577.619,661109513023.278
929,ARG,2011,330000000.0,PRIMAP,385827120.0,PRIMAP,530163281574.658,797263881242.037
930,ARG,2013,341000000.0,PRIMAP,427932323.0,PRIMAP,552025140252.246,849616000404.317
...,...,...,...,...,...,...,...,...
13924,ARG,1974,215000000.0,PRIMAP,273891871.0,PRIMAP,72436777342.4443,0.0
13925,ARG,1975,218000000.0,PRIMAP,276173940.0,PRIMAP,52438647921.9226,0.0
13926,ARG,1976,226000000.0,PRIMAP,284225466.0,PRIMAP,51169499892.1722,0.0
13927,ARG,1977,228000000.0,PRIMAP,286370667.0,PRIMAP,56781000101.0848,0.0


In [19]:
#df_unfccc = dequantify_df(df_unfccc)
#df_unfccc
#df_unfccc.info(verbose=True)
#df_unfccc

df_result= requantify_df(df_result)
#df_result= dequantify_df(df_result)
#df_result




<font size=3><b>Merge UNFCCC , Worldbank and OECD data</b> </font>

In [20]:
#df_result.info(verbose=True)
#df_result= requantify_df(df_result)
#df_result.info(verbose=True)

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))




df_result = pd.merge(df_result,df_unfccc,on=['country_iso_code','validity_date'],how='outer')  


rename_columns = {'ghg_total_without_lulucf':'scope1_excl_lulucf',
                  'ghg_total_with_lulucf':'scope1_incl_lulucf'}
#columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_incl_lulucf','scope1_incl_lulucf_units','scope2_value','scope2_value_units','scope3_value','scope3_value_units','exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units']
df_result = df_result.rename(columns=rename_columns)
df_result.info(verbose=True)

# enrich with population data

df_result = pd.merge(df_result,df_population,on=['country_iso_code','validity_date'],how='outer')  


rename_columns = {'value':'population'}
#columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_incl_lulucf','scope1_incl_lulucf_units','scope2_value','scope2_value_units','scope3_value','scope3_value_units','exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units']
df_result = df_result.rename(columns=rename_columns)
df_result.info(verbose=True)
df_result["population"] = df_result["population"].fillna(0)

df_result = dequantify_df(df_result)

#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope1_incl_lulucf'] = df_result['scope1_incl_lulucf'].fillna(0).astype(float)
#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope2_value'] = df_result['scope2_value'].fillna(0).astype(float)
#df_result['scope3_value'] = df_result['scope3_value'].fillna(0).astype(float)


df_result['sum_yn'] = 1 * ((df_result['scope1_excl_lulucf'] > 0) & (df_result['scope2_value'] > 0) &  (df_result['scope3_value'] > 0) )
df_result['calc_attr_factor_yn'] = 1 * (df_result['gdp_ppp'] > 0)
df_result['population_available'] = 1 * (df_result['population'] > 0)


df_result['consumption_emissions_excl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])
df_result['consumption_emissions_incl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])

df_result.info(verbose=True)

df_result['consumption_emissions_excl_lulucf_per_capita']  = df_result['population_available'] * df_result['consumption_emissions_excl_lulucf'] / df_result['population']
df_result['consumption_emissions_incl_lulucf_per_capita']  = df_result['population_available'] * df_result['consumption_emissions_incl_lulucf'] / df_result['population']


###############
df_result['attribution_factor_scope1_excl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_excl_lulucf']) / df_result['gdp_ppp']
df_result['attribution_factor_scope1_incl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_incl_lulucf']) / df_result['gdp_ppp']

df_result['attribution_factor_excl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
df_result['attribution_factor_incl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
##########
df_result['attribution_factor_excl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])
df_result['attribution_factor_incl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])



#df_result['ghg_total_without_lulucf']=df_result['ghg_total_without_lulucf'].replace(0, np.nan)
#df_result['ghg_total_with_lulucf']=df_result['ghg_total_with_lulucf'].replace(0, np.nan)
#df_result['gdp']=df_result['gdp'].replace(0, np.nan)
#df_result['gdp_ppp']=df_result['gdp_ppp'].replace(0, np.nan)



#df_result

columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_excl_source','scope1_incl_lulucf','scope1_incl_lulucf_units',
                 'scope1_incl_source','scope2_value','scope2_value_units','scope3_value','scope3_value_units',
                 'exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'population','consumption_emissions_excl_lulucf_per_capita','consumption_emissions_incl_lulucf_per_capita',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
#'consumption_emissions_excl_lulucf_units','consumption_emissions_incl_lulucf_units','consumption_emissions_excl_lulucf_per_capita_units','consumption_emissions_incl_lulucf_per_capita_units',
#'attribution_factor_excl_lulucf_units','attribution_factor_excl_lulucf_gdp_units','attribution_factor_incl_lulucf_units','attribution_factor_incl_lulucf_units',
#'attribution_factor_scope1_excl_lulucf_units','attribution_factor_scope1_incl_lulucf_units',

df_result = df_result.reindex(columns=columns_order)
df_result.info(verbose=True)
df_result= dequantify_df(df_result)
df_result.info(verbose=True)
# remove invalid country_iso_codes (regions, ...)

df_result = pd.merge(df_result,df_country,on=['country_iso_code'])  

cols= ['scope1_excl_lulucf','scope1_incl_lulucf','scope2_value','scope3_value',
                 'exported_emissions','gdp_ppp','gdp',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'population','consumption_emissions_excl_lulucf_per_capita','consumption_emissions_incl_lulucf_per_capita',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
df_result[cols] = df_result[cols].replace({0:np.nan})

df_result.info(verbose=True)

df_result


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15433 entries, 0 to 15432
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   country_iso_code    15433 non-null  string         
 1   validity_date       15433 non-null  Int64          
 2   scope2_value        2016 non-null   pint[CO2eq * t]
 3   scope3_value        2016 non-null   pint[CO2eq * t]
 4   exported_emissions  2016 non-null   pint[CO2eq * t]
 5   scope1_excl_lulucf  15025 non-null  pint[CO2eq * t]
 6   scope1_excl_source  10633 non-null  string         
 7   scope1_incl_lulucf  15025 non-null  pint[CO2eq * t]
 8   scope1_incl_source  10634 non-null  string         
 9   gdp                 15025 non-null  pint[USD]      
 10  gdp_ppp             15025 non-null  pint[USD]      
dtypes: Int64(1), pint[CO2eq * t](5), pint[USD](2), string(3)
memory usage: 1.3 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15536 entries, 0 

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope1_incl_source,scope2_value,scope2_value_units,...,consumption_emissions_incl_lulucf,population,consumption_emissions_excl_lulucf_per_capita,consumption_emissions_incl_lulucf_per_capita,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_incl_lulucf,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_gdp,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_gdp
0,ARG,1995,251000000.0,CO2eq * t,PRIMAP,3.194317e+08,CO2eq * t,PRIMAP,21000.0,CO2eq * t,...,3.091237e+08,,,,0.000724,0.000921,0.000751,0.001009,0.000948,0.001274
1,ARG,1996,264000000.0,CO2eq * t,PRIMAP,3.514958e+08,CO2eq * t,PRIMAP,28000.0,CO2eq * t,...,3.405038e+08,,,,0.000708,0.000943,0.000739,0.001012,0.000973,0.001333
2,ARG,1997,265000000.0,CO2eq * t,PRIMAP,2.419562e+08,CO2eq * t,UNFCCC,26000.0,CO2eq * t,...,2.300672e+08,,,,0.000647,0.000590,0.000683,0.000956,0.000627,0.000877
3,ARG,1998,271000000.0,CO2eq * t,PRIMAP,3.722612e+08,CO2eq * t,PRIMAP,43000.0,CO2eq * t,...,3.567572e+08,,,,0.000630,0.000865,0.000661,0.000951,0.000896,0.001290
4,ARG,1999,277000000.0,CO2eq * t,PRIMAP,3.636761e+08,CO2eq * t,PRIMAP,61000.0,CO2eq * t,...,3.500521e+08,,,,0.000657,0.000862,0.000686,0.001020,0.000892,0.001326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12316,GIB,2013,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33694,,,,,,,,
12317,GIB,2020,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33691,,,,,,,,
12318,GIB,2012,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33653,,,,,,,,
12319,GIB,2016,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,33738,,,,,,,,


In [21]:
#df_result['scope1_value_units'] = 'Mt CO2e'
#df_result['scope1_value'] = df_result['scope1_value'].round(decimals = 3)
#pd.options.display.float_format = '{:.3f}'.format
#df_result= df_result.convert_dtypes()
#df_result.info(verbose=True)
df_result[df_result['country_iso_code']=='CAN']
#df_result

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope1_incl_source,scope2_value,scope2_value_units,...,consumption_emissions_incl_lulucf,population,consumption_emissions_excl_lulucf_per_capita,consumption_emissions_incl_lulucf_per_capita,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_incl_lulucf,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_gdp,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_gdp
414,CAN,1995,6.454553e+08,CO2eq * t,UNFCCC,6.058811e+08,CO2eq * t,UNFCCC,5129000.0,CO2eq * t,...,6.534731e+08,,,,,,,,,
415,CAN,1996,6.667019e+08,CO2eq * t,UNFCCC,6.212992e+08,CO2eq * t,UNFCCC,5269000.0,CO2eq * t,...,6.750222e+08,,,,,,,,,
416,CAN,1997,6.822990e+08,CO2eq * t,UNFCCC,6.329551e+08,CO2eq * t,UNFCCC,6362000.0,CO2eq * t,...,6.815551e+08,,,,0.000903,0.000838,0.001180,0.001361,0.001115,0.001286
417,CAN,1998,6.886139e+08,CO2eq * t,UNFCCC,6.335611e+08,CO2eq * t,UNFCCC,7993000.0,CO2eq * t,...,6.890391e+08,,,,0.000868,0.000798,0.001147,0.001436,0.001078,0.001350
418,CAN,1999,7.017587e+08,CO2eq * t,UNFCCC,6.540323e+08,CO2eq * t,UNFCCC,9100000.0,CO2eq * t,...,7.144953e+08,,,,0.000829,0.000773,0.001103,0.001376,0.001047,0.001306
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,CAN,1978,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,,,,,,,,,
472,CAN,1972,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,,,,,,,,,
473,CAN,1963,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,,,,,,,,,
474,CAN,1961,,CO2eq * t,,,CO2eq * t,,,CO2eq * t,...,,,,,,,,,,


Calculate Attribution factor = (Scope1+Scope2+Scope3)/GDP


In [22]:


#df_result= requantify_df(df_result)
format_mapper =  {'gdp_ppp': '{0:,.0f}',
           'gdp': '{0:,.0f}',       
           'attribution_factor_excl_lulucf': '{0:.8f}',
           'attribution_factor_incl_lulucf': '{0:.8f}',
           'attribution_factor_excl_lulucf_gdp': '{0:.8f}',
           'attribution_factor_incl_lulucf_gdp': '{0:.8f}',       
           'scope1_excl_lulucf': '{0:,.0f}',       
           'scope1_incl_lulucf': '{0:,.0f}',              
           'scope2_value': '{0:,.0f}',              
           'scope3_value': '{0:,.0f}'
                 }

df_result[df_result['country_iso_code']=='CAN'].style.format(format_mapper)


Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope1_incl_source,scope2_value,scope2_value_units,scope3_value,scope3_value_units,exported_emissions,exported_emissions_units,gdp_ppp,gdp_ppp_units,gdp,gdp_units,consumption_emissions_excl_lulucf,consumption_emissions_incl_lulucf,population,consumption_emissions_excl_lulucf_per_capita,consumption_emissions_incl_lulucf_per_capita,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_incl_lulucf,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_gdp,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_gdp
414,CAN,1995,645455297.0,CO2eq * t,UNFCCC,605881079.0,CO2eq * t,UNFCCC,5129000.0,CO2eq * t,179111000.0,CO2eq * t,136648000.0,CO2eq * t,,USD,604031623433,USD,693047296.776335,653473079.122202,,,,,,,,,
415,CAN,1996,666701910.0,CO2eq * t,UNFCCC,621299195.0,CO2eq * t,UNFCCC,5269000.0,CO2eq * t,188198000.0,CO2eq * t,139744000.0,CO2eq * t,,USD,628546387972,USD,720424910.230366,675022195.292942,,,,,,,,,
416,CAN,1997,682298998.0,CO2eq * t,UNFCCC,632955107.0,CO2eq * t,UNFCCC,6362000.0,CO2eq * t,202972000.0,CO2eq * t,160734000.0,CO2eq * t,755390170461.0,USD,654986999856,USD,730898998.432811,681555106.804654,,,,0.000903,0.000838,0.00118036,0.0013613,0.00111504,0.00128596
417,CAN,1998,688613931.0,CO2eq * t,UNFCCC,633561120.0,CO2eq * t,UNFCCC,7993000.0,CO2eq * t,214108000.0,CO2eq * t,166623000.0,CO2eq * t,793674327704.0,USD,634000000000,USD,744091931.024858,689039119.917268,,,,0.000868,0.000798,0.00114747,0.00143646,0.0010781,0.00134962
418,CAN,1999,701758712.0,CO2eq * t,UNFCCC,654032317.0,CO2eq * t,UNFCCC,9100000.0,CO2eq * t,222868000.0,CO2eq * t,171505000.0,CO2eq * t,846412945810.0,USD,678412196271,USD,762221711.728973,714495316.8326,,,,0.000829,0.000773,0.00110316,0.00137634,0.00104677,0.00130599
419,CAN,2000,726987283.0,CO2eq * t,UNFCCC,690491255.0,CO2eq * t,UNFCCC,18191000.0,CO2eq * t,236036000.0,CO2eq * t,178858000.0,CO2eq * t,900996986801.0,USD,744773415932,USD,802356282.993296,765860254.730039,30685730.0,26.147538,24.958189,0.000807,0.000766,0.00108903,0.00131747,0.00104853,0.00126846
420,CAN,2001,718320043.0,CO2eq * t,UNFCCC,673786320.0,CO2eq * t,UNFCCC,19531000.0,CO2eq * t,228887000.0,CO2eq * t,175811000.0,CO2eq * t,937786776177.0,USD,738981792355,USD,790927043.152663,746393319.668803,,,,0.000766,0.000718,0.00103087,0.0013082,0.00098338,0.00124794
421,CAN,2002,724281130.0,CO2eq * t,UNFCCC,714722119.0,CO2eq * t,UNFCCC,8195000.0,CO2eq * t,233500000.0,CO2eq * t,165582000.0,CO2eq * t,971003788233.0,USD,760649334098,USD,800394129.555186,790835118.836829,,,,0.000746,0.000736,0.00099482,0.00126994,0.00098498,0.00125737
422,CAN,2003,743286854.0,CO2eq * t,UNFCCC,733290060.0,CO2eq * t,UNFCCC,8220000.0,CO2eq * t,232392000.0,CO2eq * t,176420000.0,CO2eq * t,1023682523003.0,USD,895540646635,USD,807478854.384847,797482059.638611,,,,0.000726,0.000716,0.00096114,0.00109866,0.00095137,0.0010875
423,CAN,2004,745194047.0,CO2eq * t,UNFCCC,733509090.0,CO2eq * t,UNFCCC,8411000.0,CO2eq * t,238375000.0,CO2eq * t,191817000.0,CO2eq * t,1083609294356.0,USD,1026690238278,USD,800163047.462783,788478090.09989,,,,0.000688,0.000677,0.00091544,0.00096619,0.00090466,0.00095481


<font size=3>Save the results in Trino</font>

In [23]:
# Add the sources used for the calculations in the result table
#df_result['scope1_excl_source'] = "UNFCCC -> Time_Series_GHG_total_without_LULUCF_in_kt_CO2_equivalent.xlsx"
#df_result['scope1_incl_source'] = "UNFCCC -> Time_Series_GHG_total_with_LULUCF_in_kt_CO2_equivalent.xlsx"
df_result['scope2_source'] = "OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE"
df_result['scope3_source'] = "OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value "          
df_result['gdp_ppp_source'] = "Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv"          


In [24]:
import osc_ingest_trino as osc
#df_result['validity_date']=pd.to_datetime(df_result['validity_date'], unit='D')
df_result=df_result.convert_dtypes()
df_result.info(verbose=True)

df_result['year'] = df_result['validity_date']
#df_result['validity_date'] = pd.to_datetime(df_result.validity_date, format='%Y')
df_result = df_result.convert_dtypes()
#df_result.validity_date = df_result.validity_date.astype('datetime64[ns]')



columnschema = osc.create_table_schema_pairs(df_result,typemap={'datetime64[ns]':'timestamp(6)'}) 
sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
print(sql)
qres = engine.execute(sql)
#print(qres.fetchall())





<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12321 entries, 0 to 12320
Data columns (total 32 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   country_iso_code                              12321 non-null  string 
 1   validity_date                                 12321 non-null  Int64  
 2   scope1_excl_lulucf                            10633 non-null  Float64
 3   scope1_excl_lulucf_units                      12321 non-null  string 
 4   scope1_excl_source                            10633 non-null  string 
 5   scope1_incl_lulucf                            10634 non-null  Float64
 6   scope1_incl_lulucf_units                      12321 non-null  string 
 7   scope1_incl_source                            10634 non-null  string 
 8   scope2_value                                  1419 non-null   Float64
 9   scope2_value_units                            12321 non-null 

In [25]:
df_result.info("verbose=True")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12321 entries, 0 to 12320
Data columns (total 33 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   country_iso_code                              12321 non-null  string 
 1   validity_date                                 12321 non-null  Int64  
 2   scope1_excl_lulucf                            10633 non-null  Float64
 3   scope1_excl_lulucf_units                      12321 non-null  string 
 4   scope1_excl_source                            10633 non-null  string 
 5   scope1_incl_lulucf                            10634 non-null  Float64
 6   scope1_incl_lulucf_units                      12321 non-null  string 
 7   scope1_incl_source                            10634 non-null  string 
 8   scope2_value                                  1419 non-null   Float64
 9   scope2_value_units                            12321 non-null 

In [26]:
df_result = df_result[df_result['year']>=1990]
df_result.sort_values(by=['country_iso_code','validity_date'], inplace=True)
df_result.to_excel("pcaf_results.xlsx",index=False)
df_result.to_csv("pcaf_results.csv",index=False)


In [27]:
df_result.to_sql(ingest_table,
           con=engine,
           schema=ingest_schema,
           if_exists='append',
           index=False,
           method=osc.TrinoBatchInsert(batch_size = 50 , verbose = True))


constructed fully qualified table name as: "pcaf_sovereign_footprint.sf_total_sovereign_emissions"
inserting 50 records
  ('ABW', 1990, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 1363755808.78007, 'USD', 764804469.273743, 'USD', NULL, NULL, 62152, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE', 'OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value ', 'Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv', 1990)
  ('ABW', 1991, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 1522141309.42665, 'USD', 872067039.106145, 'USD', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE', 'OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value ', 'Worldbank -> AP

In [28]:
sql=f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table} where country_iso_code = 'BRA' and year > 2017 """
pd.read_sql(sql, engine)

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope1_incl_source,scope2_value,scope2_value_units,...,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_incl_lulucf,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_gdp,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_gdp,scope2_source,scope3_source,gdp_ppp_source,year
0,BRA,2018,1090000000.0,CO2eq * t,PRIMAP,1479250000.0,CO2eq * t,PRIMAP,11000.0,CO2eq * t,...,0.000346,0.00047,0.000383,0.000628,0.000507,0.000831,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2018
1,BRA,2019,1100000000.0,CO2eq * t,PRIMAP,1499492000.0,CO2eq * t,PRIMAP,,CO2eq * t,...,0.000339,0.000463,,,,,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2019
2,BRA,2020,1080000000.0,CO2eq * t,PRIMAP,1439934000.0,CO2eq * t,PRIMAP,,CO2eq * t,...,0.000343,0.000457,,,,,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2020
3,BRA,2021,1130000000.0,CO2eq * t,PRIMAP,1488956000.0,CO2eq * t,PRIMAP,,CO2eq * t,...,0.000329,0.000433,,,,,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2021
