<font size=6> The total sovereign emissions calculated by adding up scope 1, 2 and 3 emissions  </font>

In [1]:
report_currency = 'USD'

In [2]:
from dotenv import dotenv_values, load_dotenv
import osc_ingest_trino as osc
import os
import pathlib
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [3]:
import trino
from sqlalchemy.engine import create_engine

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

#trino_bucket = osc.attach_s3_bucket("S3_DEV")

In [4]:
import boto3
import pandas as pd

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

In [5]:
# define source and destination tables
# LULUCF (Land Use, Land-Use Change and Forestry)

ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'pcaf_sovereign_footprint'
ingest_table = 'sf_total_sovereign_emissions'


src_table_1 = 'sf_oecd_imgr_fco2'
src_table_2 = 'sf_unfccc_results'
src_table_3 = 'sf_oecd_exgr_dco2'
src_table_4 = 'sf_oecd_exch_rates'
src_table_5 = 'sf_wdi_population'





In [6]:
import pandas as pd
from openscm_units import unit_registry
from pint import set_application_registry, Quantity
from pint_pandas import PintArray, PintType
# First we create the registry.
ureg = unit_registry
Q_ = ureg.Quantity
ureg.default_format = '~'
ureg.define("CO2e = CO2 = CO2eq")
ureg.define("USD = [currency] ")
#ureg.define("EUR = [currency_EUR] ")
ureg.define('Millions=1000000')
set_application_registry(ureg)


In [7]:
def requantify_df(df):
    units_col = None
    columns_reversed = reversed(df.columns)
    for col in columns_reversed:
        if col.endswith("_units"):
            if units_col:
                # We expect _units column to follow a non-units column
                raise ValueError
            units_col = col
            continue
        if units_col:
            if col + '_units' != units_col:
                raise ValueError
            if (df[units_col]==df[units_col][0]).all():
                # Make a PintArray
                new_col = PintArray(df[col], dtype=f"pint[{ureg(df[units_col][0]).u}]")
            else:
                # Make a pd.Series of Quantity in a way that does not throw UnitStrippedWarning
                new_col = pd.Series(data=df[col], name=col) * pd.Series(data=df[units_col].map(lambda x: ureg(x).u), name=col)
            df = df.drop(columns=units_col)
            df[col] = new_col
            units_col = None
    return df


In [8]:
# If DF_COL contains Pint quantities (because it is a PintArray or an array of Pint Quantities),
# return a two-column dataframe of magnitudes and units.
# If DF_COL contains no Pint quanities, return it unchanged.

def dequantify_column(df_col: pd.Series):
    if type(df_col.values)==PintArray:
        return pd.DataFrame({df_col.name: df_col.values.quantity.m,
                             df_col.name + "_units": str(df_col.values.dtype.units)},
                            index=df_col.index)
    elif df_col.size==0:
        return df_col
    elif isinstance(df_col.iloc[0], Quantity):
        values = df_col.map(lambda x: (x.m, x.u))
        return pd.DataFrame({df_col.name: df_col.map(lambda x: x.m),
                             df_col.name + "_units": df_col.map(lambda x: str(x.u))},
                            index=df_col.index)
    else:
        return df_col

# Rewrite dataframe DF so that columns containing Pint quantities are represented by a column for the Magnitude and column for the Units.
# The magnitude column retains the original column name and the units column is renamed with a _units suffix.
def dequantify_df(df):
    return pd.concat([dequantify_column(df[col]) for col in df.columns], axis=1)

In [9]:
# Get the values saved in Trino related with Foreign CO2 emissions embodied in gross imports

import pandas as pd

sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_1} """ 
df1 = pd.read_sql(sql, engine)
df1 = requantify_df(df1)
df1
# convert to CO2 units to t
df1['value'] = df1['value'].pint.to("t CO2e")
df1 = dequantify_df(df1)
df1

Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,BRN,NLD,DTOTAL,2003,Foreign CO2 emissions embodied in gross imports,87000.0,CO2eq * t
1,BRN,NLD,DTOTAL,2004,Foreign CO2 emissions embodied in gross imports,73000.0,CO2eq * t
2,BRN,NLD,DTOTAL,2005,Foreign CO2 emissions embodied in gross imports,97000.0,CO2eq * t
3,BRN,NLD,DTOTAL,2006,Foreign CO2 emissions embodied in gross imports,66000.0,CO2eq * t
4,BRN,NLD,DTOTAL,2007,Foreign CO2 emissions embodied in gross imports,76000.0,CO2eq * t
...,...,...,...,...,...,...,...
334651,TUN,G20,D35,2014,Foreign CO2 emissions embodied in gross imports,33000.0,CO2eq * t
334652,TUN,G20,D35,2015,Foreign CO2 emissions embodied in gross imports,36000.0,CO2eq * t
334653,TUN,G20,D35,2016,Foreign CO2 emissions embodied in gross imports,15000.0,CO2eq * t
334654,TUN,G20,D35,2017,Foreign CO2 emissions embodied in gross imports,19000.0,CO2eq * t


In [10]:
sql=f"""
select country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_3} where industry_code='DTOTAL'""" 
df_exgr = pd.read_sql(sql, engine)
df_exgr = requantify_df(df_exgr)
# convert to CO2 units to t
df_exgr['value'] = df_exgr['value'].pint.to("t CO2e")
df_exgr = dequantify_df(df_exgr)
df_exgr

Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,CRI,VNM,DTOTAL,1995,Domestic CO2 emissions embodied in gross exports,2000.0,CO2eq * t
1,CRI,VNM,DTOTAL,1996,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t
2,CRI,VNM,DTOTAL,1997,Domestic CO2 emissions embodied in gross exports,3000.0,CO2eq * t
3,CRI,VNM,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t
4,CRI,VNM,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,3000.0,CO2eq * t
...,...,...,...,...,...,...,...
167323,KOR,FRA,DTOTAL,2006,Domestic CO2 emissions embodied in gross exports,2153000.0,CO2eq * t
167324,KOR,FRA,DTOTAL,2007,Domestic CO2 emissions embodied in gross exports,1999000.0,CO2eq * t
167325,KOR,FRA,DTOTAL,2008,Domestic CO2 emissions embodied in gross exports,1897000.0,CO2eq * t
167326,KOR,FRA,DTOTAL,2009,Domestic CO2 emissions embodied in gross exports,2427000.0,CO2eq * t


In [11]:
###########################

sql=f"""
select validity_date,value from {ingest_catalog}.{ingest_schema}.{src_table_4} where country_iso_code='DEU'""" 
df_exch_eur = pd.read_sql(sql, engine)
df_exch_eur

Unnamed: 0,validity_date,value
0,1950,2.144861
1,1951,2.144861
2,1952,2.144861
3,1953,2.147426
4,1954,2.147426
...,...,...
67,2017,0.885206
68,2018,0.846773
69,2019,0.893276
70,2020,0.875506


In [12]:
import pandas as pd 
sql=f"""
select country_iso_code,validity_date,value from {ingest_catalog}.{ingest_schema}.{src_table_5} """ 
df_population = pd.read_sql(sql, engine)
df_population["value"] = pd.to_numeric(df_population["value"], errors='coerce')
df_population["validity_date"] = pd.to_numeric(df_population["validity_date"], errors='coerce')
df_population=df_population.convert_dtypes()
df_population
#df_population.info(verbose=True)


Unnamed: 0,country_iso_code,validity_date,value
0,AFG,2014,33370804
1,ALB,2014,2889104
2,DZA,2014,38923688
3,ASM,2014,55791
4,AND,2014,79213
...,...,...,...
3187,SSF,2017,1050162810
3188,SSA,2017,1050066967
3189,TSS,2017,1050162810
3190,UMC,2017,2449903406


<font size=3> Remove invalid country codes </font>

In [13]:
import pycountry
import pandas as pd
df_country = pd.DataFrame([country.__dict__['_fields'] for country in pycountry.countries])
df_country = df_country.rename(columns={'alpha_3':'country_iso_code'})
df_country=df_country[['country_iso_code']]
# add Rest of World to the country dataframe
dict_row = {'country_iso_code':'ROW'}
df_country = df_country.append(dict_row, ignore_index = True)
df_country.info(verbose=True)
df1=df1.convert_dtypes()
#df1.info(verbose=True)
df1 = pd.merge(df1,df_country,on=['country_iso_code'])  
df1=df1.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df1

df_exgr = pd.merge(df_exgr,df_country,on=['country_iso_code'])  
df_exgr=df_exgr.convert_dtypes()
#df_result.drop(columns=['country_name_y'],inplace=True)
df_exgr






  df_country = df_country.append(dict_row, ignore_index = True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 1 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   country_iso_code  250 non-null    object
dtypes: object(1)
memory usage: 2.1+ KB


Unnamed: 0,country_iso_code,partner_iso_code,industry_code,validity_date,attribute,value,value_units
0,CRI,VNM,DTOTAL,1995,Domestic CO2 emissions embodied in gross exports,2000.0,CO2eq * t
1,CRI,VNM,DTOTAL,1996,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t
2,CRI,VNM,DTOTAL,1997,Domestic CO2 emissions embodied in gross exports,3000.0,CO2eq * t
3,CRI,VNM,DTOTAL,1998,Domestic CO2 emissions embodied in gross exports,4000.0,CO2eq * t
4,CRI,VNM,DTOTAL,1999,Domestic CO2 emissions embodied in gross exports,3000.0,CO2eq * t
...,...,...,...,...,...,...,...
135067,JPN,MMR,DTOTAL,2014,Domestic CO2 emissions embodied in gross exports,384000.0,CO2eq * t
135068,JPN,MMR,DTOTAL,2015,Domestic CO2 emissions embodied in gross exports,413000.0,CO2eq * t
135069,JPN,MMR,DTOTAL,2016,Domestic CO2 emissions embodied in gross exports,404000.0,CO2eq * t
135070,JPN,MMR,DTOTAL,2017,Domestic CO2 emissions embodied in gross exports,317000.0,CO2eq * t


Calcuate Scope 2:
GHG emissions occurring as a consequence of the domestic use of grid-supplied electricity, heat,
steam and/or cooling which is imported from another territory


In [14]:
rename_columns = {'value':'total','partner_iso_code':'country_iso_code','value_units' :'total_units'}
agg_columns = { 'value' : 'sum'}
columns_order = ['attribute','industry_code','country_iso_code','validity_date','total','total_units']
df1 = df1.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df1.convert_dtypes()

df1.info(verbose=True)
df1

######################

df_exgr = df_exgr.groupby(['partner_iso_code','industry_code','attribute','validity_date','value_units'],as_index=False).agg(agg_columns).rename(columns=rename_columns).reindex(columns=columns_order)

df_exgr.convert_dtypes()

df_exgr.info(verbose=True)
df_exgr



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4032 entries, 0 to 4031
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         4032 non-null   string 
 1   industry_code     4032 non-null   string 
 2   country_iso_code  4032 non-null   string 
 3   validity_date     4032 non-null   Int64  
 4   total             4032 non-null   Float64
 5   total_units       4032 non-null   string 
dtypes: Float64(1), Int64(1), string(4)
memory usage: 197.0 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   attribute         2016 non-null   string 
 1   industry_code     2016 non-null   string 
 2   country_iso_code  2016 non-null   string 
 3   validity_date     2016 non-null   Int64  
 4   total             2016 non-null   Float64
 5   total_units    

Unnamed: 0,attribute,industry_code,country_iso_code,validity_date,total,total_units
0,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1995,2008843000.0,CO2eq * t
1,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1996,2055450000.0,CO2eq * t
2,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1997,2151870000.0,CO2eq * t
3,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1998,2177243000.0,CO2eq * t
4,Domestic CO2 emissions embodied in gross exports,DTOTAL,APEC,1999,2300588000.0,CO2eq * t
...,...,...,...,...,...,...
2011,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2014,266081000.0,CO2eq * t
2012,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2015,231751000.0,CO2eq * t
2013,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2016,211686000.0,CO2eq * t
2014,Domestic CO2 emissions embodied in gross exports,DTOTAL,ZSCA,2017,226732000.0,CO2eq * t


In [15]:
# divide the dataframe into two dataframes 
# to calculate Scope2 and Scope3 Values
df_D35 = df1[df1['industry_code']=='D35']
df_D35 = df_D35[['country_iso_code','validity_date','total','total_units']]
df_DTOTAL = df1[df1['industry_code']=='DTOTAL']
df_DTOTAL = df_DTOTAL[['country_iso_code','validity_date','total','total_units']]
df_result = pd.merge(df_D35,df_DTOTAL,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()

#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total_x':'scope2_value','total_y':'scope3_value','total_units_x':'scope2_value_units','total_units_y':'scope3_value_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result['scope3_value'] = df_result['scope3_value'] - df_result['scope2_value'] 
df_result[df_result['country_iso_code']=='CAN']

df_result = pd.merge(df_result,df_exgr,on=['country_iso_code','validity_date'])  
df_result=df_result.convert_dtypes()
#df_result.drop(columns=['attribute_y','total_units_y'],inplace=True)
rename_columns = {'total':'exported_emissions','total_units':'exported_emissions_units'}
df_result.rename(columns=rename_columns,inplace=True)
df_result.drop(columns=['attribute','industry_code'],inplace=True)
df_result.info(verbose=True)



<class 'pandas.core.frame.DataFrame'>
Int64Index: 2016 entries, 0 to 2015
Data columns (total 8 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   country_iso_code          2016 non-null   string 
 1   validity_date             2016 non-null   Int64  
 2   scope2_value              2016 non-null   Float64
 3   scope2_value_units        2016 non-null   string 
 4   scope3_value              2016 non-null   Float64
 5   scope3_value_units        2016 non-null   string 
 6   exported_emissions        2016 non-null   Float64
 7   exported_emissions_units  2016 non-null   string 
dtypes: Float64(3), Int64(1), string(4)
memory usage: 149.6 KB


In [16]:
# Get the values saved in Trino containing  GHG emissions provided by UNFCCC & GDP PPP values from Worldbank

import pandas as pd


sql=f"""
select country_iso_code,validity_date,ghg_total_without_lulucf,ghg_total_without_lulucf_units,scope1_excl_source,ghg_total_with_lulucf,ghg_total_with_lulucf_units
,gdp,gdp_units,gdp_ppp,gdp_ppp_units
from {ingest_catalog}.{ingest_schema}.{src_table_2} """ 
df_unfccc = pd.read_sql(sql, engine)

if (report_currency == 'EUR'):
    df_unfccc = pd.merge(df_unfccc,df_exch_eur,on=['validity_date'],how='inner')   
    df_unfccc['gdp'] = df_unfccc['gdp'] * df_unfccc['value']
    df_unfccc['gdp_ppp'] = df_unfccc['gdp_ppp'] * df_unfccc['value']
    df_unfccc['gdp_units'] = 'EUR'
    df_unfccc['gdp_ppp_units'] = 'EUR'
    df_unfccc.drop(columns=['value'],inplace=True)
    


df_unfccc[df_unfccc['country_iso_code']=='ARG']
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)
#df1["units"] = "kt"
#df1 = requantify_df(df1).convert_dtypes()
#df1.info(verbose=True)
#df1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14824 entries, 0 to 14823
Data columns (total 11 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                14824 non-null  string 
 1   validity_date                   14824 non-null  Int64  
 2   ghg_total_without_lulucf        10447 non-null  Float64
 3   ghg_total_without_lulucf_units  14824 non-null  string 
 4   scope1_excl_source              10447 non-null  string 
 5   ghg_total_with_lulucf           2256 non-null   Float64
 6   ghg_total_with_lulucf_units     14824 non-null  string 
 7   gdp                             12854 non-null  Float64
 8   gdp_units                       14824 non-null  string 
 9   gdp_ppp                         7210 non-null   Float64
 10  gdp_ppp_units                   14824 non-null  string 
dtypes: Float64(4), Int64(1), string(6)
memory usage: 1.3 MB


In [17]:
import numpy as np
df_unfccc = df_unfccc.convert_dtypes()
df_unfccc.info(verbose=True)

df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))

df_unfccc = requantify_df(df_unfccc)
df_unfccc.info(verbose=True)
# convert to CO2 units to Mt
df_unfccc['ghg_total_without_lulucf'] = df_unfccc['ghg_total_without_lulucf'].pint.to("t CO2e")
df_unfccc['ghg_total_with_lulucf'] = df_unfccc['ghg_total_with_lulucf'].pint.to("t CO2e")
####

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf==0, np.nan, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf==0, np.nan, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp==0, np.nan, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp==0, np.nan, df_unfccc.gdp_ppp))

df_unfccc.info(verbose=True)


df_unfccc[df_unfccc['country_iso_code']=='ARG']



#df_unfccc['gdp'] = df_unfccc['gdp'].pint.to("Millions USD")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14824 entries, 0 to 14823
Data columns (total 11 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country_iso_code                14824 non-null  string 
 1   validity_date                   14824 non-null  Int64  
 2   ghg_total_without_lulucf        10447 non-null  Float64
 3   ghg_total_without_lulucf_units  14824 non-null  string 
 4   scope1_excl_source              10447 non-null  string 
 5   ghg_total_with_lulucf           2256 non-null   Float64
 6   ghg_total_with_lulucf_units     14824 non-null  string 
 7   gdp                             12854 non-null  Float64
 8   gdp_units                       14824 non-null  string 
 9   gdp_ppp                         7210 non-null   Float64
 10  gdp_ppp_units                   14824 non-null  string 
dtypes: Float64(4), Int64(1), string(6)
memory usage: 1.3 MB
<class 'pandas.core.frame.DataFrame'>

  return np.array(qtys, dtype="object", copy=copy)
  return np.array(qtys, dtype="object", copy=copy)


Unnamed: 0,country_iso_code,validity_date,ghg_total_without_lulucf,scope1_excl_source,ghg_total_with_lulucf,gdp,gdp_ppp
773,ARG,2010,187000000.0,PRIMAP,447642430.0,423627422092.49,736718360687.747
775,ARG,2012,198000000.0,PRIMAP,429478449.93,545982375701.128,819697901873.041
776,ARG,1990,102000000.0,PRIMAP,216291389.487407,141352368714.691,234564125672.49
777,ARG,1994,115000000.0,PRIMAP,223335527.904266,257440000000.0,349832146514.708
778,ARG,1997,133000000.0,PRIMAP,241956194.21024,292859000000.0,410342067258.763
...,...,...,...,...,...,...,...
12052,ARG,2017,198000000.0,PRIMAP,0.0,643628665302.155,1039330591568.54
12053,ARG,2018,196000000.0,PRIMAP,0.0,524819742918.669,1036428172096.8
12054,ARG,2019,188000000.0,PRIMAP,0.0,451932356085.842,1033557797795.68
12055,ARG,2020,178000000.0,PRIMAP,0.0,389288056265.325,942508330081.911


In [18]:
#df_unfccc = dequantify_df(df_unfccc)
#df_unfccc
#df_unfccc.info(verbose=True)
#df_unfccc

df_result= requantify_df(df_result)
#df_result= dequantify_df(df_result)
#df_result


<font size=3><b>Merge UNFCCC , Worldbank and OECD data</b> </font>

In [19]:
#df_result.info(verbose=True)
#df_result= requantify_df(df_result)
#df_result.info(verbose=True)

#df_unfccc = df_unfccc.assign(ghg_total_without_lulucf=np.where(df_unfccc.ghg_total_without_lulucf.isnull(), 0, df_unfccc.ghg_total_without_lulucf))
#df_unfccc = df_unfccc.assign(ghg_total_with_lulucf=np.where(df_unfccc.ghg_total_with_lulucf.isnull(), 0, df_unfccc.ghg_total_with_lulucf))
#df_unfccc = df_unfccc.assign(gdp=np.where(df_unfccc.gdp.isnull(), 0, df_unfccc.gdp))
#df_unfccc = df_unfccc.assign(gdp_ppp=np.where(df_unfccc.gdp_ppp.isnull(), 0, df_unfccc.gdp_ppp))


df_result = pd.merge(df_result,df_unfccc,on=['country_iso_code','validity_date'],how='outer')  


rename_columns = {'ghg_total_without_lulucf':'scope1_excl_lulucf',
                  'ghg_total_with_lulucf':'scope1_incl_lulucf'}
#columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_incl_lulucf','scope1_incl_lulucf_units','scope2_value','scope2_value_units','scope3_value','scope3_value_units','exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units']
df_result = df_result.rename(columns=rename_columns)
df_result.info(verbose=True)

# enrich with population data

df_result = pd.merge(df_result,df_population,on=['country_iso_code','validity_date'],how='outer')  


rename_columns = {'value':'population'}
#columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_lulucf_units','scope1_incl_lulucf','scope1_incl_lulucf_units','scope2_value','scope2_value_units','scope3_value','scope3_value_units','exported_emissions','exported_emissions_units','gdp_ppp','gdp_ppp_units','gdp','gdp_units']
df_result = df_result.rename(columns=rename_columns)
df_result.info(verbose=True)
df_result["population"] = df_result["population"].fillna(0)



#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope1_incl_lulucf'] = df_result['scope1_incl_lulucf'].fillna(0).astype(float)
#df_result['scope1_excl_lulucf'] = df_result['scope1_excl_lulucf'].fillna(0).astype(float)
#df_result['scope2_value'] = df_result['scope2_value'].fillna(0).astype(float)
#df_result['scope3_value'] = df_result['scope3_value'].fillna(0).astype(float)



df_result['sum_yn'] = 1 * ((df_result['scope1_excl_lulucf'] > 0) & (df_result['scope2_value'] > 0) &  (df_result['scope3_value'] > 0) )
df_result['calc_attr_factor_yn'] = 1 * (df_result['gdp_ppp'] > 0)
df_result['population_available'] = 1 * (df_result['population'] > 0)


df_result['consumption_emissions_excl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])
df_result['consumption_emissions_incl_lulucf'] = df_result['sum_yn'] * ( df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value'] - df_result['exported_emissions'])

df_result['consumption_emissions_excl_lulucf_per_capita']  = df_result['population_available'] * df_result['consumption_emissions_excl_lulucf'] / df_result['population']
df_result['consumption_emissions_incl_lulucf_per_capita']  = df_result['population_available'] * df_result['consumption_emissions_incl_lulucf'] / df_result['population']


###############
df_result['attribution_factor_scope1_excl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_excl_lulucf']) / df_result['gdp_ppp']
df_result['attribution_factor_scope1_incl_lulucf']= df_result['calc_attr_factor_yn'] * (df_result['scope1_incl_lulucf']) / df_result['gdp_ppp']

df_result['attribution_factor_excl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
df_result['attribution_factor_incl_lulucf']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp_ppp'])
##########
df_result['attribution_factor_excl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_excl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])
df_result['attribution_factor_incl_lulucf_gdp']= df_result['calc_attr_factor_yn'] * df_result['sum_yn'] * ( (df_result['scope1_incl_lulucf'] + df_result['scope2_value'] + df_result['scope3_value']) / df_result['gdp'])



#df_result['ghg_total_without_lulucf']=df_result['ghg_total_without_lulucf'].replace(0, np.nan)
#df_result['ghg_total_with_lulucf']=df_result['ghg_total_with_lulucf'].replace(0, np.nan)
#df_result['gdp']=df_result['gdp'].replace(0, np.nan)
#df_result['gdp_ppp']=df_result['gdp_ppp'].replace(0, np.nan)



#df_result

columns_order = ['country_iso_code','validity_date','scope1_excl_lulucf','scope1_excl_source','scope1_incl_lulucf','scope2_value','scope3_value',
                 'exported_emissions','gdp_ppp','gdp',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'population','consumption_emissions_excl_lulucf_per_capita','consumption_emissions_incl_lulucf_per_capita',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
df_result = df_result.reindex(columns=columns_order)
df_result.info(verbose=True)
df_result= dequantify_df(df_result)
df_result.info(verbose=True)
# remove invalid country_iso_codes (regions, ...)

df_result = pd.merge(df_result,df_country,on=['country_iso_code'])  

cols= ['scope1_excl_lulucf','scope1_incl_lulucf','scope2_value','scope3_value',
                 'exported_emissions','gdp_ppp','gdp',
                 'consumption_emissions_excl_lulucf','consumption_emissions_incl_lulucf',
                 'population','consumption_emissions_excl_lulucf_per_capita','consumption_emissions_incl_lulucf_per_capita',
                 'attribution_factor_scope1_excl_lulucf','attribution_factor_scope1_incl_lulucf',
                 'attribution_factor_excl_lulucf','attribution_factor_excl_lulucf_gdp',
                 'attribution_factor_incl_lulucf','attribution_factor_incl_lulucf_gdp']
df_result[cols] = df_result[cols].replace({0:np.nan})


df_result


<class 'pandas.core.frame.DataFrame'>
Int64Index: 15256 entries, 0 to 15255
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   country_iso_code    15256 non-null  string         
 1   validity_date       15256 non-null  Int64          
 2   scope2_value        2016 non-null   pint[CO2eq * t]
 3   scope3_value        2016 non-null   pint[CO2eq * t]
 4   exported_emissions  2016 non-null   pint[CO2eq * t]
 5   scope1_excl_lulucf  14824 non-null  pint[CO2eq * t]
 6   scope1_excl_source  10447 non-null  string         
 7   scope1_incl_lulucf  14824 non-null  pint[CO2eq * t]
 8   gdp                 14824 non-null  pint[USD]      
 9   gdp_ppp             14824 non-null  pint[USD]      
dtypes: Int64(1), pint[CO2eq * t](5), pint[USD](2), string(2)
memory usage: 1.3 MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 15473 entries, 0 to 15472
Data columns (total 11 columns):
 #   Column    

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,...,attribution_factor_scope1_incl_lulucf,attribution_factor_scope1_incl_lulucf_units,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_units,attribution_factor_excl_lulucf_gdp,attribution_factor_excl_lulucf_gdp_units,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units
0,ARG,1995,117000000.0,CO2eq * t,PRIMAP,,CO2eq * t,21000.0,CO2eq * t,9356000.0,...,,CO2eq * t / USD,0.000364,CO2eq * t / USD,0.000490,CO2eq * t / USD,0.000027,CO2eq * t / USD,0.000036,CO2eq * t / USD
1,ARG,1996,132000000.0,CO2eq * t,PRIMAP,,CO2eq * t,28000.0,CO2eq * t,11287000.0,...,,CO2eq * t / USD,0.000384,CO2eq * t / USD,0.000527,CO2eq * t / USD,0.000030,CO2eq * t / USD,0.000042,CO2eq * t / USD
2,ARG,1997,133000000.0,CO2eq * t,PRIMAP,2.419562e+08,CO2eq * t,26000.0,CO2eq * t,14968000.0,...,0.00059,CO2eq * t / USD,0.000361,CO2eq * t / USD,0.000505,CO2eq * t / USD,0.000626,CO2eq * t / USD,0.000877,CO2eq * t / USD
3,ARG,1998,141000000.0,CO2eq * t,PRIMAP,,CO2eq * t,43000.0,CO2eq * t,13397000.0,...,,CO2eq * t / USD,0.000358,CO2eq * t / USD,0.000517,CO2eq * t / USD,0.000031,CO2eq * t / USD,0.000045,CO2eq * t / USD
4,ARG,1999,143000000.0,CO2eq * t,PRIMAP,,CO2eq * t,61000.0,CO2eq * t,12270000.0,...,,CO2eq * t / USD,0.000368,CO2eq * t / USD,0.000548,CO2eq * t / USD,0.000029,CO2eq * t / USD,0.000043,CO2eq * t / USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12267,MAF,2015,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
12268,MAF,2018,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
12269,MAF,2019,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
12270,MAF,2020,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD


In [20]:
#df_result['scope1_value_units'] = 'Mt CO2e'
#df_result['scope1_value'] = df_result['scope1_value'].round(decimals = 3)
#pd.options.display.float_format = '{:.3f}'.format
#df_result= df_result.convert_dtypes()
#df_result.info(verbose=True)
df_result[df_result['country_iso_code']=='CAN']
#df_result

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,...,attribution_factor_scope1_incl_lulucf,attribution_factor_scope1_incl_lulucf_units,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_units,attribution_factor_excl_lulucf_gdp,attribution_factor_excl_lulucf_gdp_units,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units
414,CAN,1995,6.454553e+08,CO2eq * t,UNFCCC,6.058811e+08,CO2eq * t,5129000.0,CO2eq * t,179111000.0,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
415,CAN,1996,6.667019e+08,CO2eq * t,UNFCCC,6.212992e+08,CO2eq * t,5269000.0,CO2eq * t,188198000.0,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
416,CAN,1997,6.822990e+08,CO2eq * t,UNFCCC,6.329551e+08,CO2eq * t,6362000.0,CO2eq * t,202972000.0,...,0.000838,CO2eq * t / USD,0.001180,CO2eq * t / USD,0.001361,CO2eq * t / USD,0.001115,CO2eq * t / USD,0.001286,CO2eq * t / USD
417,CAN,1998,6.886139e+08,CO2eq * t,UNFCCC,6.335611e+08,CO2eq * t,7993000.0,CO2eq * t,214108000.0,...,0.000798,CO2eq * t / USD,0.001147,CO2eq * t / USD,0.001436,CO2eq * t / USD,0.001078,CO2eq * t / USD,0.001350,CO2eq * t / USD
418,CAN,1999,7.017587e+08,CO2eq * t,UNFCCC,6.540323e+08,CO2eq * t,9100000.0,CO2eq * t,222868000.0,...,0.000773,CO2eq * t / USD,0.001103,CO2eq * t / USD,0.001376,CO2eq * t / USD,0.001047,CO2eq * t / USD,0.001306,CO2eq * t / USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,CAN,1977,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
472,CAN,1987,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
473,CAN,1988,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
474,CAN,1982,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD


Calculate Attribution factor = (Scope1+Scope2+Scope3)/GDP


In [21]:


#df_result= requantify_df(df_result)
format_mapper =  {'gdp_ppp': '{0:,.0f}',
           'gdp': '{0:,.0f}',       
           'attribution_factor_excl_lulucf': '{0:.8f}',
           'attribution_factor_incl_lulucf': '{0:.8f}',
           'attribution_factor_excl_lulucf_gdp': '{0:.8f}',
           'attribution_factor_incl_lulucf_gdp': '{0:.8f}',       
           'scope1_excl_lulucf': '{0:,.0f}',       
           'scope1_incl_lulucf': '{0:,.0f}',              
           'scope2_value': '{0:,.0f}',              
           'scope3_value': '{0:,.0f}'
                 }

df_result[df_result['country_iso_code']=='CAN'].style.format(format_mapper)


Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,scope3_value_units,exported_emissions,exported_emissions_units,gdp_ppp,gdp_ppp_units,gdp,gdp_units,consumption_emissions_excl_lulucf,consumption_emissions_excl_lulucf_units,consumption_emissions_incl_lulucf,consumption_emissions_incl_lulucf_units,population,consumption_emissions_excl_lulucf_per_capita,consumption_emissions_excl_lulucf_per_capita_units,consumption_emissions_incl_lulucf_per_capita,consumption_emissions_incl_lulucf_per_capita_units,attribution_factor_scope1_excl_lulucf,attribution_factor_scope1_excl_lulucf_units,attribution_factor_scope1_incl_lulucf,attribution_factor_scope1_incl_lulucf_units,attribution_factor_excl_lulucf,attribution_factor_excl_lulucf_units,attribution_factor_excl_lulucf_gdp,attribution_factor_excl_lulucf_gdp_units,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units
414,CAN,1995,645455297.0,CO2eq * t,UNFCCC,605881079.0,CO2eq * t,5129000.0,CO2eq * t,179111000.0,CO2eq * t,136648000.0,CO2eq * t,,USD,604031623433.0,USD,693047296.776335,CO2eq * t,653473079.122202,CO2eq * t,,,CO2eq * t,,CO2eq * t,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
415,CAN,1996,666701910.0,CO2eq * t,UNFCCC,621299195.0,CO2eq * t,5269000.0,CO2eq * t,188198000.0,CO2eq * t,139744000.0,CO2eq * t,,USD,628546387972.0,USD,720424910.230366,CO2eq * t,675022195.292942,CO2eq * t,,,CO2eq * t,,CO2eq * t,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD
416,CAN,1997,682298998.0,CO2eq * t,UNFCCC,632955107.0,CO2eq * t,6362000.0,CO2eq * t,202972000.0,CO2eq * t,160734000.0,CO2eq * t,755390170461.0,USD,654986999856.0,USD,730898998.432811,CO2eq * t,681555106.804654,CO2eq * t,,,CO2eq * t,,CO2eq * t,0.000903,CO2eq * t / USD,0.000838,CO2eq * t / USD,0.00118036,CO2eq * t / USD,0.0013613,CO2eq * t / USD,0.00111504,CO2eq * t / USD,0.00128596,CO2eq * t / USD
417,CAN,1998,688613931.0,CO2eq * t,UNFCCC,633561120.0,CO2eq * t,7993000.0,CO2eq * t,214108000.0,CO2eq * t,166623000.0,CO2eq * t,793674327704.0,USD,634000000000.0,USD,744091931.024858,CO2eq * t,689039119.917268,CO2eq * t,,,CO2eq * t,,CO2eq * t,0.000868,CO2eq * t / USD,0.000798,CO2eq * t / USD,0.00114747,CO2eq * t / USD,0.00143646,CO2eq * t / USD,0.0010781,CO2eq * t / USD,0.00134962,CO2eq * t / USD
418,CAN,1999,701758712.0,CO2eq * t,UNFCCC,654032317.0,CO2eq * t,9100000.0,CO2eq * t,222868000.0,CO2eq * t,171505000.0,CO2eq * t,846412945810.0,USD,678412196271.0,USD,762221711.728973,CO2eq * t,714495316.8326,CO2eq * t,,,CO2eq * t,,CO2eq * t,0.000829,CO2eq * t / USD,0.000773,CO2eq * t / USD,0.00110316,CO2eq * t / USD,0.00137634,CO2eq * t / USD,0.00104677,CO2eq * t / USD,0.00130599,CO2eq * t / USD
419,CAN,2000,726987283.0,CO2eq * t,UNFCCC,690491255.0,CO2eq * t,18191000.0,CO2eq * t,236036000.0,CO2eq * t,178858000.0,CO2eq * t,900996986801.0,USD,744773415932.0,USD,802356282.993296,CO2eq * t,765860254.730039,CO2eq * t,30685730.0,26.147538,CO2eq * t,24.958189,CO2eq * t,0.000807,CO2eq * t / USD,0.000766,CO2eq * t / USD,0.00108903,CO2eq * t / USD,0.00131747,CO2eq * t / USD,0.00104853,CO2eq * t / USD,0.00126846,CO2eq * t / USD
420,CAN,2001,718320043.0,CO2eq * t,UNFCCC,673786320.0,CO2eq * t,19531000.0,CO2eq * t,228887000.0,CO2eq * t,175811000.0,CO2eq * t,937786776177.0,USD,738981792355.0,USD,790927043.152663,CO2eq * t,746393319.668803,CO2eq * t,,,CO2eq * t,,CO2eq * t,0.000766,CO2eq * t / USD,0.000718,CO2eq * t / USD,0.00103087,CO2eq * t / USD,0.0013082,CO2eq * t / USD,0.00098338,CO2eq * t / USD,0.00124794,CO2eq * t / USD
421,CAN,2002,724281130.0,CO2eq * t,UNFCCC,714722119.0,CO2eq * t,8195000.0,CO2eq * t,233500000.0,CO2eq * t,165582000.0,CO2eq * t,971003788233.0,USD,760649334098.0,USD,800394129.555186,CO2eq * t,790835118.836829,CO2eq * t,,,CO2eq * t,,CO2eq * t,0.000746,CO2eq * t / USD,0.000736,CO2eq * t / USD,0.00099482,CO2eq * t / USD,0.00126994,CO2eq * t / USD,0.00098498,CO2eq * t / USD,0.00125737,CO2eq * t / USD
422,CAN,2003,743286854.0,CO2eq * t,UNFCCC,733290060.0,CO2eq * t,8220000.0,CO2eq * t,232392000.0,CO2eq * t,176420000.0,CO2eq * t,1023682523003.0,USD,895540646635.0,USD,807478854.384847,CO2eq * t,797482059.638611,CO2eq * t,,,CO2eq * t,,CO2eq * t,0.000726,CO2eq * t / USD,0.000716,CO2eq * t / USD,0.00096114,CO2eq * t / USD,0.00109866,CO2eq * t / USD,0.00095137,CO2eq * t / USD,0.0010875,CO2eq * t / USD
423,CAN,2004,745194047.0,CO2eq * t,UNFCCC,733509090.0,CO2eq * t,8411000.0,CO2eq * t,238375000.0,CO2eq * t,191817000.0,CO2eq * t,1083609294356.0,USD,1026690238278.0,USD,800163047.462783,CO2eq * t,788478090.09989,CO2eq * t,,,CO2eq * t,,CO2eq * t,0.000688,CO2eq * t / USD,0.000677,CO2eq * t / USD,0.00091544,CO2eq * t / USD,0.00096619,CO2eq * t / USD,0.00090466,CO2eq * t / USD,0.00095481,CO2eq * t / USD


<font size=3>Save the results in Trino</font>

In [22]:
# Add the sources used for the calculations in the result table
#df_result['scope1_excl_source'] = "UNFCCC -> Time_Series_GHG_total_without_LULUCF_in_kt_CO2_equivalent.xlsx"
df_result['scope1_incl_source'] = "UNFCCC -> Time_Series_GHG_total_with_LULUCF_in_kt_CO2_equivalent.xlsx"
df_result['scope2_source'] = "OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE"
df_result['scope3_source'] = "OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value "          
df_result['gdp_ppp_source'] = "Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv"          


In [23]:
import osc_ingest_trino as osc
#df_result['validity_date']=pd.to_datetime(df_result['validity_date'], unit='D')
df_result=df_result.convert_dtypes()
df_result.info(verbose=True)

df_result['year'] = df_result['validity_date']
#df_result['validity_date'] = pd.to_datetime(df_result.validity_date, format='%Y')
df_result = df_result.convert_dtypes()
#df_result.validity_date = df_result.validity_date.astype('datetime64[ns]')



columnschema = osc.create_table_schema_pairs(df_result,typemap={'datetime64[ns]':'timestamp(6)'}) 
sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
print(sql)
qres = engine.execute(sql)
#print(qres.fetchall())





<class 'pandas.core.frame.DataFrame'>
Int64Index: 12272 entries, 0 to 12271
Data columns (total 42 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   country_iso_code                                    12272 non-null  string 
 1   validity_date                                       12272 non-null  Int64  
 2   scope1_excl_lulucf                                  10391 non-null  Float64
 3   scope1_excl_lulucf_units                            12272 non-null  string 
 4   scope1_excl_source                                  10447 non-null  string 
 5   scope1_incl_lulucf                                  2256 non-null   Float64
 6   scope1_incl_lulucf_units                            12272 non-null  string 
 7   scope2_value                                        1419 non-null   Float64
 8   scope2_value_units                                  12272 non-null  string 


In [24]:
df_result.info("verbose=True")

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12272 entries, 0 to 12271
Data columns (total 43 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   country_iso_code                                    12272 non-null  string 
 1   validity_date                                       12272 non-null  Int64  
 2   scope1_excl_lulucf                                  10391 non-null  Float64
 3   scope1_excl_lulucf_units                            12272 non-null  string 
 4   scope1_excl_source                                  10447 non-null  string 
 5   scope1_incl_lulucf                                  2256 non-null   Float64
 6   scope1_incl_lulucf_units                            12272 non-null  string 
 7   scope2_value                                        1419 non-null   Float64
 8   scope2_value_units                                  12272 non-null  string 


In [25]:
df_result = df_result[df_result['year']>=1990]
df_result.sort_values(by=['country_iso_code','validity_date'], inplace=True)
df_result.to_excel("pcaf_results.xls",index=False,encoding='utf-8')


  return func(*args, **kwargs)
  df_result.to_excel("pcaf_results.xls",index=False,encoding='utf-8')


In [26]:
df_result.to_sql(ingest_table,
           con=engine,
           schema=ingest_schema,
           if_exists='append',
           index=False,
           method=osc.TrinoBatchInsert(batch_size = 50 , verbose = True))


constructed fully qualified table name as: "pcaf_sovereign_footprint.sf_total_sovereign_emissions"
inserting 50 records
  ('ABW', 1990, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 1447708861.20673, 'USD', 764887117.194486, 'USD', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 62152, NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', NULL, 'CO2eq * t / USD', 'UNFCCC -> Time_Series_GHG_total_with_LULUCF_in_kt_CO2_equivalent.xlsx', 'OECD -> IMGR_FCO2.csv -> Industry_Code = D35  -> Aggregation grouped by Partner_ISO_CODE', 'OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL - Scope_2 Value ', 'Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_4019306.csv', 1990)
  ('ABW', 1991, NULL, 'CO2eq * t', NULL, NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', NULL, 'CO2eq * t', 1615844601.64351, 'USD', 872138715.083799, 'USD', NULL, 'CO2eq *

In [29]:
sql=f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table} where country_iso_code = 'CAN' and year > 2017 """
pd.read_sql(sql, engine)

Unnamed: 0,country_iso_code,validity_date,scope1_excl_lulucf,scope1_excl_lulucf_units,scope1_excl_source,scope1_incl_lulucf,scope1_incl_lulucf_units,scope2_value,scope2_value_units,scope3_value,...,attribution_factor_excl_lulucf_gdp_units,attribution_factor_incl_lulucf,attribution_factor_incl_lulucf_units,attribution_factor_incl_lulucf_gdp,attribution_factor_incl_lulucf_gdp_units,scope1_incl_source,scope2_source,scope3_source,gdp_ppp_source,year
0,CAN,2018,740006300.0,CO2eq * t,UNFCCC,731538100.0,CO2eq * t,4953000.0,CO2eq * t,260931000.0,...,CO2eq * t / USD,0.000538,CO2eq * t / USD,0.000578,CO2eq * t / USD,UNFCCC -> Time_Series_GHG_total_with_LULUCF_in...,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2018
1,CAN,2019,738283400.0,CO2eq * t,UNFCCC,722348300.0,CO2eq * t,,CO2eq * t,,...,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,UNFCCC -> Time_Series_GHG_total_with_LULUCF_in...,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2019
2,CAN,2020,672354000.0,CO2eq * t,UNFCCC,665593500.0,CO2eq * t,,CO2eq * t,,...,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,UNFCCC -> Time_Series_GHG_total_with_LULUCF_in...,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2020
3,CAN,2021,,CO2eq * t,,,CO2eq * t,,CO2eq * t,,...,CO2eq * t / USD,,CO2eq * t / USD,,CO2eq * t / USD,UNFCCC -> Time_Series_GHG_total_with_LULUCF_in...,OECD -> IMGR_FCO2.csv -> Industry_Code = D35 ...,OECD -> IMGR_FCO2.csv -> Indutry_Code = DTOTAL...,Worldbank -> API_NY.GDP.MKTP.CD_DS2_en_csv_v2_...,2021
