<font size="6">Calculate GHG Intensity per GDP, PPP </font>

In [1]:
from dotenv import dotenv_values, load_dotenv
import osc_ingest_trino as osc
import os
import pathlib

<font size="4">Load Environment Variables</font>

In [2]:
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [3]:
import trino
from sqlalchemy.engine import create_engine

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

trino_bucket = osc.attach_s3_bucket("S3_DEV")

In [4]:
import boto3

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

Open a Trino connection using JWT for authentication

In [5]:
# Show available schemas to ensure trino connection is set correctly
ingest_catalog = 'osc_datacommons_dev'
schema_read = engine.execute(f'show schemas in {ingest_catalog}')
for row in schema_read.fetchall():
    print(row)

('aicoe_osc_demo_results',)
('default',)
('demo_dv',)
('dera',)
('essd',)
('iceberg_demo',)
('information_schema',)
('ingest',)
('mdt_sandbox',)
('pcaf_sovereign_footprint',)
('sandbox',)
('wri_gppd',)


In [6]:
# define source and destination tables
# LULUCF (Land Use, Land-Use Change and Forestry)

ingest_schema = 'pcaf_sovereign_footprint'
ingest_table = 'sf_unfccc_results'
src_table_1 = 'sf_unfccc_with_lulucf'
src_table_2 = 'sf_unfccc_without_lulucf'
src_table_3 = 'sf_wdi_gdp'
src_table_4 = 'sf_primap_hist_emissions'
src_country_table = 'sf_unfccc_countries'

In [7]:
########################
essd_schema = 'mdt_sandbox'
essd_src_table = 'gwp100_data'
#essd_src_table = 'ghg_data' 

In [8]:
def requantify_df(df):
    units_col = None
    columns_reversed = reversed(df.columns)
    for col in columns_reversed:
        if col.endswith("_units"):
            if units_col:
                # We expect _units column to follow a non-units column
                raise ValueError
            units_col = col
            continue
        if units_col:
            if col + '_units' != units_col:
                raise ValueError
            if (df[units_col]==df[units_col][0]).all():
                # Make a PintArray
                new_col = PintArray(df[col], dtype=f"pint[{ureg(df[units_col][0]).u}]")
            else:
                # Make a pd.Series of Quantity in a way that does not throw UnitStrippedWarning
                new_col = pd.Series(data=df[col], name=col) * pd.Series(data=df[units_col].map(lambda x: ureg(x).u), name=col)
            df = df.drop(columns=units_col)
            df[col] = new_col
            units_col = None
    return df


In [9]:
from openscm_units import unit_registry
#PintType.ureg = unit_registry
ureg = unit_registry
Q_ = ureg.Quantity
ureg.define("CO2e=CO2=CO2eq=CO2_eq")
ureg.define("USD=[currency]=$")

In [10]:
from pint import UnitRegistry, set_application_registry
set_application_registry(ureg)

In [11]:
import pandas as pd
from functools import reduce
import pandas as pd
import pint
from pint import set_application_registry, Quantity
from pint_pandas import PintArray, PintType
from pint_pandas.pint_array import is_pint_type

In [12]:
### ESSD data is not needed at the moment (maybe in the future)

'''
sql=f"""
select iso as country_iso_code,year(year) as validity_date, co2/1000 as value,t.* from {ingest_catalog}.{essd_schema}.{essd_src_table} t """ 
df_essd = pd.read_sql(sql, engine)
#df1["units"] = "kt"
df_essd = df_essd.convert_dtypes()
df_essd[df_essd['country_iso_code'] == 'ZWE'][df_essd['validity_date'] ==2020]
rename_columns = {'value':'total'}
agg_columns = { 'value' : 'sum'}
columns_order = ['attribute','industry_code','country_iso_code','validity_date','total','total_units']
df_essd = df_essd.groupby(['country_iso_code','validity_date'],as_index=False).agg(agg_columns).rename(columns=rename_columns)
df_essd
'''



#df_essd.info(verbose=True)

'\nsql=f"""\nselect iso as country_iso_code,year(year) as validity_date, co2/1000 as value,t.* from {ingest_catalog}.{essd_schema}.{essd_src_table} t """ \ndf_essd = pd.read_sql(sql, engine)\n#df1["units"] = "kt"\ndf_essd = df_essd.convert_dtypes()\ndf_essd[df_essd[\'country_iso_code\'] == \'ZWE\'][df_essd[\'validity_date\'] ==2020]\nrename_columns = {\'value\':\'total\'}\nagg_columns = { \'value\' : \'sum\'}\ncolumns_order = [\'attribute\',\'industry_code\',\'country_iso_code\',\'validity_date\',\'total\',\'total_units\']\ndf_essd = df_essd.groupby([\'country_iso_code\',\'validity_date\'],as_index=False).agg(agg_columns).rename(columns=rename_columns)\ndf_essd\n'

In [13]:
sql=f"""
select country_iso_code,country_name,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_1} """ 
df1 = pd.read_sql(sql, engine)
#df1["units"] = "kt"
df1 = requantify_df(df1).convert_dtypes()
df1
df1.info(verbose=True)
sql=f"""
select t.country_iso_code,country_name,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_2} t, {ingest_catalog}.{ingest_schema}.{src_country_table} c
where t.country_iso_code= c.country_iso_code and c.annex1_flag='Y'"""
df2 = pd.read_sql(sql, engine)
#df2 = requantify_df(df2).convert_dtypes()
#df2
# gdp
sql=f"""
select country_iso_code,validity_date,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_3} where attribute='GDP (current US$)'"""
df3 = pd.read_sql(sql, engine)
df3 = requantify_df(df3).convert_dtypes()
# gdp ppp
sql=f"""
select country_iso_code,validity_date,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_3} where attribute='GDP, PPP (current international $)'"""
df4 = pd.read_sql(sql, engine)
df4 = requantify_df(df4).convert_dtypes()





<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2256 entries, 0 to 2255
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype          
---  ------            --------------  -----          
 0   country_iso_code  2256 non-null   string         
 1   country_name      2256 non-null   string         
 2   validity_date     2256 non-null   Int64          
 3   attribute         2256 non-null   string         
 4   value             2256 non-null   pint[CO2e * kt]
dtypes: Int64(1), pint[CO2e * kt](1), string(3)
memory usage: 90.5 KB


In [14]:
# PRIMAP data

sql=f"""
select t.country_iso_code,validity_date,attribute,value,value_units from {ingest_catalog}.{ingest_schema}.{src_table_4} t
, {ingest_catalog}.{ingest_schema}.{src_country_table} c
where t.country_iso_code= c.country_iso_code and c.annex1_flag='N'
and attribute='CO2' and validity_date >=1960 """ 
df_primap = pd.read_sql(sql, engine)
#df_primap['value_units'] = 'kt CO2'
#df_primap = requantify_df(df_primap).convert_dtypes()
# convert to CO2 units to t
#df_primap['value'] = df_primap['value'].pint.to("t CO2e")
df_primap

Unnamed: 0,country_iso_code,validity_date,attribute,value,value_units
0,CMR,1960,CO2,387.0,CO2 * gigagram / a
1,CMR,1961,CO2,400.0,CO2 * gigagram / a
2,CMR,1962,CO2,303.0,CO2 * gigagram / a
3,CMR,1963,CO2,315.0,CO2 * gigagram / a
4,CMR,1964,CO2,354.0,CO2 * gigagram / a
...,...,...,...,...,...
9109,GUY,2017,CO2,2020.0,CO2 * gigagram / a
9110,GUY,2018,CO2,2220.0,CO2 * gigagram / a
9111,GUY,2019,CO2,2210.0,CO2 * gigagram / a
9112,GUY,2020,CO2,2050.0,CO2 * gigagram / a


In [15]:
#df2 = pd.merge(df2,df_essd,on=['country_iso_code','validity_date'],how='outer')  
#df2.rename(columns={"value": "ghg_total_without_lulucf", "total": "ghg_total_without_lulucf_essd"},inplace=True)
df2 = pd.merge(df2,df_primap,on=['country_iso_code','validity_date'],how='outer')  
df2.rename(columns={"value_units_x":"value_units","value_x": "ghg_total_without_lulucf_unfccc","value_y": "ghg_total_without_lulucf_primap"},inplace=True)
df2=df2.convert_dtypes()
df2.info(verbose=True)
df_primap.info(verbose=True)
df2

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10447 entries, 0 to 10446
Data columns (total 9 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   country_iso_code                 10447 non-null  string 
 1   country_name                     1333 non-null   string 
 2   validity_date                    10447 non-null  Int64  
 3   attribute_x                      1333 non-null   string 
 4   ghg_total_without_lulucf_unfccc  1333 non-null   Float64
 5   value_units                      1333 non-null   string 
 6   attribute_y                      9114 non-null   string 
 7   ghg_total_without_lulucf_primap  9114 non-null   Float64
 8   value_units_y                    9114 non-null   string 
dtypes: Float64(2), Int64(1), string(6)
memory usage: 846.8 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9114 entries, 0 to 9113
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dt

Unnamed: 0,country_iso_code,country_name,validity_date,attribute_x,ghg_total_without_lulucf_unfccc,value_units,attribute_y,ghg_total_without_lulucf_primap,value_units_y
0,ISL,Iceland,2013,"Time Series - GHG total without LULUCF, in kt ...",4661.030556,kt CO2e,,,
1,ISL,Iceland,2014,"Time Series - GHG total without LULUCF, in kt ...",4661.461296,kt CO2e,,,
2,ISL,Iceland,2015,"Time Series - GHG total without LULUCF, in kt ...",4746.023521,kt CO2e,,,
3,ISL,Iceland,2016,"Time Series - GHG total without LULUCF, in kt ...",4692.480155,kt CO2e,,,
4,ISL,Iceland,2017,"Time Series - GHG total without LULUCF, in kt ...",4776.967094,kt CO2e,,,
...,...,...,...,...,...,...,...,...,...
10442,GUY,,2017,,,,CO2,2020.0,CO2 * gigagram / a
10443,GUY,,2018,,,,CO2,2220.0,CO2 * gigagram / a
10444,GUY,,2019,,,,CO2,2210.0,CO2 * gigagram / a
10445,GUY,,2020,,,,CO2,2050.0,CO2 * gigagram / a


In [16]:
import numpy as np
df2['scope1_excl_source'] = np.where(df2.ghg_total_without_lulucf_unfccc.notnull(),
         'UNFCCC',
         np.where(df2.ghg_total_without_lulucf_primap.notnull(),
                  'PRIMAP','UNFCCC'))
#df2['scope1_source'] = df2['ghg_total_without_lulucf'].apply(lambda x: 'UNFCCC' if x.isnull() == True else 'PRIMAP')
df2["value"] = df2["ghg_total_without_lulucf_unfccc"].fillna(df2["ghg_total_without_lulucf_primap"])
df2.drop(columns=['attribute_x','attribute_y',"value_units_y","ghg_total_without_lulucf_unfccc","ghg_total_without_lulucf_primap"],inplace=True)

columns_order = ['country_iso_code','country_name','validity_date','value','value_units','scope1_excl_source']
df2 = df2.reindex(columns=columns_order).convert_dtypes()
df2.info(verbose=True)
##df2[df2["scope1_excl_source"] =='PRIMAP']
#df2 = requantify_df(df2)

#df2

#df2.sort_values(by=['country_iso_code','validity_date'], ascending=False).to_csv("tmp_scope1_waterfall.csv")
df2[df2["country_iso_code"] == 'CAN']

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10447 entries, 0 to 10446
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   country_iso_code    10447 non-null  string 
 1   country_name        1333 non-null   string 
 2   validity_date       10447 non-null  Int64  
 3   value               10447 non-null  Float64
 4   value_units         1333 non-null   string 
 5   scope1_excl_source  10447 non-null  string 
dtypes: Float64(1), Int64(1), string(4)
memory usage: 591.7 KB


Unnamed: 0,country_iso_code,country_name,validity_date,value,value_units,scope1_excl_source
8,CAN,Canada,1990,594722.24342,kt CO2e,UNFCCC
9,CAN,Canada,1991,587905.793885,kt CO2e,UNFCCC
10,CAN,Canada,1992,605290.24882,kt CO2e,UNFCCC
11,CAN,Canada,1993,607681.076914,kt CO2e,UNFCCC
12,CAN,Canada,1994,628145.869194,kt CO2e,UNFCCC
13,CAN,Canada,1995,645455.296776,kt CO2e,UNFCCC
14,CAN,Canada,1996,666701.91023,kt CO2e,UNFCCC
15,CAN,Canada,1997,682298.998433,kt CO2e,UNFCCC
16,CAN,Canada,1998,688613.931025,kt CO2e,UNFCCC
17,CAN,Canada,1999,701758.711729,kt CO2e,UNFCCC


Read the source tables into dataframes  

<font size="4">Merge the source dataframes in a single dataframe and calculate ghg intensity values</font>

In [17]:
df2 = df2.assign(value=np.where(df2.value.isnull(), 0, df2.value))
df2 = requantify_df(df2)
df2

  return np.array(qtys, dtype="object", copy=copy)
  return np.array(qtys, dtype="object", copy=copy)


Unnamed: 0,country_iso_code,country_name,validity_date,value,scope1_excl_source
0,ISL,Iceland,2013,4661.030556210798,UNFCCC
1,ISL,Iceland,2014,4661.461295830477,UNFCCC
2,ISL,Iceland,2015,4746.023520688837,UNFCCC
3,ISL,Iceland,2016,4692.480155280686,UNFCCC
4,ISL,Iceland,2017,4776.967094270128,UNFCCC
...,...,...,...,...,...
10442,GUY,,2017,2020.0,PRIMAP
10443,GUY,,2018,2220.0,PRIMAP
10444,GUY,,2019,2210.0,PRIMAP
10445,GUY,,2020,2050.0,PRIMAP


In [18]:

df_result = pd.merge(df1,df2,on=['country_iso_code','validity_date'],how='outer')  
df_result=df_result.convert_dtypes()
df_result.drop(columns=['country_name_y','attribute'],inplace=True)
df_result.rename(columns={"country_name_x":"country_name","value_x": "ghg_total_with_lulucf","value_y": "ghg_total_without_lulucf"},inplace=True)

###

df_result = pd.merge(df_result,df3,on=['country_iso_code','validity_date'],how="outer")  
df_result.rename(columns={"value":"gdp"},inplace=True)
###
df_result = pd.merge(df_result,df4,on=['country_iso_code','validity_date'],how="outer")  
df_result.rename(columns={"value":"gdp_ppp"},inplace=True)



df_result["ghg_intensity_with_lulucf_per_gdp"] = df_result["ghg_total_with_lulucf"]/df_result["gdp_ppp"]*1000000
df_result["ghg_intensity_without_lulucf_per_gdp"] = df_result["ghg_total_without_lulucf"]/df_result["gdp_ppp"]*1000000
df_result.info(verbose=True)



<class 'pandas.core.frame.DataFrame'>
Int64Index: 14824 entries, 0 to 14823
Data columns (total 10 columns):
 #   Column                                Non-Null Count  Dtype                
---  ------                                --------------  -----                
 0   country_iso_code                      14824 non-null  string               
 1   country_name                          2256 non-null   string               
 2   validity_date                         14824 non-null  Int64                
 3   ghg_total_with_lulucf                 2256 non-null   pint[CO2e * kt]      
 4   ghg_total_without_lulucf              10447 non-null  pint[CO2e * kt]      
 5   scope1_excl_source                    10447 non-null  string               
 6   gdp                                   12854 non-null  pint[USD]            
 7   gdp_ppp                               7210 non-null   pint[USD]            
 8   ghg_intensity_with_lulucf_per_gdp     2087 non-null   pint[CO2e * kt / USD]


In [19]:
# If DF_COL contains Pint quantities (because it is a PintArray or an array of Pint Quantities),
# return a two-column dataframe of magnitudes and units.
# If DF_COL contains no Pint quanities, return it unchanged.

def dequantify_column(df_col: pd.Series):
    if type(df_col.values)==PintArray:
        return pd.DataFrame({df_col.name: df_col.values.quantity.m,
                             df_col.name + "_units": str(df_col.values.dtype.units)},
                            index=df_col.index)
    elif df_col.size==0:
        return df_col
    elif isinstance(df_col.iloc[0], Quantity):
        values = df_col.map(lambda x: (x.m, x.u))
        return pd.DataFrame({df_col.name: df_col.map(lambda x: x.m),
                             df_col.name + "_units": df_col.map(lambda x: str(x.u))},
                            index=df_col.index)
    else:
        return df_col

# Rewrite dataframe DF so that columns containing Pint quantities are represented by a column for the Magnitude and column for the Units.
# The magnitude column retains the original column name and the units column is renamed with a _units suffix.
def dequantify_df(df):
    return pd.concat([dequantify_column(df[col]) for col in df.columns], axis=1)

In [20]:
df1 = dequantify_df(df1)
df2 = dequantify_df(df2)
df3 = dequantify_df(df3)
df_result = dequantify_df(df_result)
df_result=df_result.convert_dtypes()
df_result.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14824 entries, 0 to 14823
Data columns (total 16 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   country_iso_code                            14824 non-null  string 
 1   country_name                                2256 non-null   string 
 2   validity_date                               14824 non-null  Int64  
 3   ghg_total_with_lulucf                       2256 non-null   Float64
 4   ghg_total_with_lulucf_units                 14824 non-null  string 
 5   ghg_total_without_lulucf                    10447 non-null  Float64
 6   ghg_total_without_lulucf_units              14824 non-null  string 
 7   scope1_excl_source                          10447 non-null  string 
 8   gdp                                         12854 non-null  Float64
 9   gdp_units                                   14824 non-null  string 
 10  gdp_ppp   

In [21]:

# make sure schema exists, or table creation below will fail in weird ways
sql = f"""
CREATE SCHEMA if not exists {ingest_catalog}.{ingest_schema}
 AUTHORIZATION USER mersin35
 WITH (
     location = 's3a://osc-datacommons-s3-bucket-dev02/data/pcaf_covereign.db'
 )
"""
print(sql)
qres = engine.execute(sql)
#print(qres.fetchall())


CREATE SCHEMA if not exists osc_datacommons_dev.pcaf_sovereign_footprint
 AUTHORIZATION USER mersin35
 WITH (
     location = 's3a://osc-datacommons-s3-bucket-dev02/data/pcaf_covereign.db'
 )



Run these in a notebook cell if you need to install onto your nb env

<font size="5">Save the results in Trino</font>


In [22]:
import osc_ingest_trino as osc
#df_result['validity_date']=pd.to_datetime(df_result['validity_date'], unit='D')
#df_result=df_result.convert_dtypes()
#df_result.info()
columnschema = osc.create_table_schema_pairs(df_result) 
sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
print(sql)
qres = engine.execute(sql)
#print(qres.fetchall())




drop table if exists osc_datacommons_dev.pcaf_sovereign_footprint.sf_unfccc_results



In [23]:
tabledef = f"""
create table if not exists {ingest_catalog}.{ingest_schema}.{ingest_table}(
{columnschema}
) with (
    format = 'ORC',
    partitioning = array['country_iso_code']
)
"""
print(tabledef)
qres = engine.execute(tabledef)
#print(qres.fetchall())


create table if not exists osc_datacommons_dev.pcaf_sovereign_footprint.sf_unfccc_results(
    country_iso_code varchar,
    country_name varchar,
    validity_date bigint,
    ghg_total_with_lulucf double,
    ghg_total_with_lulucf_units varchar,
    ghg_total_without_lulucf double,
    ghg_total_without_lulucf_units varchar,
    scope1_excl_source varchar,
    gdp double,
    gdp_units varchar,
    gdp_ppp double,
    gdp_ppp_units varchar,
    ghg_intensity_with_lulucf_per_gdp double,
    ghg_intensity_with_lulucf_per_gdp_units varchar,
    ghg_intensity_without_lulucf_per_gdp double,
    ghg_intensity_without_lulucf_per_gdp_units varchar
) with (
    format = 'ORC',
    partitioning = array['country_iso_code']
)



In [24]:
# Delete all data from our db, so we start with empty table
sql=f"""
delete from {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
qres = engine.execute(sql)
#print(qres.fetchall())

In [25]:
sql=f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
pd.read_sql(sql, engine)


Unnamed: 0,country_iso_code,country_name,validity_date,ghg_total_with_lulucf,ghg_total_with_lulucf_units,ghg_total_without_lulucf,ghg_total_without_lulucf_units,scope1_excl_source,gdp,gdp_units,gdp_ppp,gdp_ppp_units,ghg_intensity_with_lulucf_per_gdp,ghg_intensity_with_lulucf_per_gdp_units,ghg_intensity_without_lulucf_per_gdp,ghg_intensity_without_lulucf_per_gdp_units


In [26]:
print(ingest_catalog)
#df=df.drop(df[df.country_name=="cote d'ivoire"].index)
df_result.to_sql(ingest_table,
           con=engine,
           schema=ingest_schema,
           if_exists='append',
           index=False,
           method=osc.TrinoBatchInsert(batch_size = 1000, verbose = True))

osc_datacommons_dev
constructed fully qualified table name as: "pcaf_sovereign_footprint.sf_unfccc_results"
inserting 1000 records
  ('CZE', 'Czechia', 2014, 119978.68902727596, 'CO2e * kt', 126728.25148794397, 'CO2e * kt', 'UNFCCC', 209358834156.329, 'USD', 342099831459.922, 'USD', 0.350712505514145, 'CO2e * kt / USD', 0.3704423090392272, 'CO2e * kt / USD')
  ('CZE', 'Czechia', 2015, 121480.17280251009, 'CO2e * kt', 128158.54173856508, 'CO2e * kt', 'UNFCCC', 188033050459.881, 'USD', 357503876184.69, 'USD', 0.33980099488418486, 'CO2e * kt / USD', 0.35848154460948317, 'CO2e * kt / USD')
  ('CZE', 'Czechia', 2016, 123701.93557176222, 'CO2e * kt', 129495.19282998645, 'CO2e * kt', 'UNFCCC', 196272068576.338, 'USD', 381420338169.846, 'USD', 0.3243191911719136, 'CO2e * kt / USD', 0.33950783393286804, 'CO2e * kt / USD')
  ...
  ('NZL', 'New Zealand', 1992, 44287.75734048566, 'CO2e * kt', 67332.21363465225, 'CO2e * kt', 'UNFCCC', 41649829859.6342, 'USD', 52698533576.7729, 'USD', 0.840398286908

In [29]:
#df_result.to_csv(ingest_table+".csv")
sql=f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table} where country_iso_code='ARG' and validity_date > 2015"""
pd.read_sql(sql, engine)


Unnamed: 0,country_iso_code,country_name,validity_date,ghg_total_with_lulucf,ghg_total_with_lulucf_units,ghg_total_without_lulucf,ghg_total_without_lulucf_units,scope1_excl_source,gdp,gdp_units,gdp_ppp,gdp_ppp_units,ghg_intensity_with_lulucf_per_gdp,ghg_intensity_with_lulucf_per_gdp_units,ghg_intensity_without_lulucf_per_gdp,ghg_intensity_without_lulucf_per_gdp_units
0,ARG,,2016,,CO2e * kt,202000.0,CO2e * kt,PRIMAP,557531400000.0,USD,885227500000.0,USD,,CO2e * kt / USD,0.22819,CO2e * kt / USD
1,ARG,,2017,,CO2e * kt,198000.0,CO2e * kt,PRIMAP,643628700000.0,USD,1039331000000.0,USD,,CO2e * kt / USD,0.190507,CO2e * kt / USD
2,ARG,,2018,,CO2e * kt,196000.0,CO2e * kt,PRIMAP,524819700000.0,USD,1036428000000.0,USD,,CO2e * kt / USD,0.189111,CO2e * kt / USD
3,ARG,,2019,,CO2e * kt,188000.0,CO2e * kt,PRIMAP,451932400000.0,USD,1033558000000.0,USD,,CO2e * kt / USD,0.181896,CO2e * kt / USD
4,ARG,,2020,,CO2e * kt,178000.0,CO2e * kt,PRIMAP,389288100000.0,USD,942508300000.0,USD,,CO2e * kt / USD,0.188858,CO2e * kt / USD
5,ARG,,2021,,CO2e * kt,196000.0,CO2e * kt,PRIMAP,,USD,,USD,,CO2e * kt / USD,,CO2e * kt / USD
