### The PRIMAP-hist national historical emissions time series (1750-2019) v2.3.1

<font size="3">https://zenodo.org/record/5494497#.YujsrTfP2Un</font>

Load Environment Variables

In [1]:
from pcaf_env import *
import csv
import ParseXLS as parser

ingest_table = 'sf_primap_hist_emissions'
models_table = 'pcaf_dbt_models'

using connect string: trino://MichaelTiemannOSC@trino-secure-odh-trino.apps.odh-cl2.apps.os-climate.org:443/osc_datacommons_dev
create schema if not exists osc_datacommons_dev.mdt_sandbox


In [2]:
# Show available schemas to ensure trino connection is set correctly
schema_read = engine.execute(f'show schemas in {ingest_catalog}')
for row in schema_read.fetchall():
    print(row)

('aicoe_osc_demo_results',)
('default',)
('demo_dv',)
('dera',)
('essd',)
('iceberg_demo',)
('information_schema',)
('ingest',)
('mdt_sandbox',)
('pcaf_sovereign_footprint',)
('rmi',)
('sandbox',)
('wri_gppd',)


In [3]:
primap_file = s3_source.Object(os.environ['S3_LANDING_BUCKET'],'PCAF-sovereign-footprint/PRIMAP/Guetschow-et-al-2022-PRIMAP-hist_v2.4_11-Oct-2022.csv')
primap_file.download_file(f'/tmp/Guetschow-et-al-2022-PRIMAP-hist_v2.4_11-Oct-2022.csv')
#df = parser.process('OECD_IMGR_FCO2.ini','OECD.csv') 

df = parser.process('PRIMAP.ini','PRIMAP.csv') 
df=df.astype({'validity_date': 'int32', 'country_iso_code': 'string', 'attribute': 'string'})
df= df.convert_dtypes()
df.info(verbose=True)

#df['validity_date_dt'] = pd.to_datetime(df.validity_date, format='%Y')
#df = df.convert_dtypes()
#df.validity_date_dt = df.validity_date_dt.astype('datetime64[ns]')



PRIMAP.ini
file_list:
['/tmp/Guetschow-et-al-2022-PRIMAP-hist_v2.4_11-Oct-2022.csv']
/tmp/Guetschow-et-al-2022-PRIMAP-hist_v2.4_11-Oct-2022.csv
2
csv
/tmp/Guetschow-et-al-2022-PRIMAP-hist_v2.4_11-Oct-202
<configparser.ConfigParser object at 0x7f1ba7aa2f70>
                 source scenario (PRIMAP-hist) area (ISO3) entity   
0      PRIMAP-hist_v2.4                 HISTCR         ABW    CH4  \
1      PRIMAP-hist_v2.4                 HISTCR         ABW    CH4   
2      PRIMAP-hist_v2.4                 HISTCR         ABW    CH4   
3      PRIMAP-hist_v2.4                 HISTCR         ABW    CH4   
4      PRIMAP-hist_v2.4                 HISTCR         ABW    CH4   
...                 ...                    ...         ...    ...   
29575  PRIMAP-hist_v2.4                 HISTTP         ZWE    N2O   
29576  PRIMAP-hist_v2.4                 HISTTP         ZWE    N2O   
29577  PRIMAP-hist_v2.4                 HISTTP         ZWE    N2O   
29578  PRIMAP-hist_v2.4                 HISTTP       

In [6]:
try:
    os.mkdir("/opt/app-root/src/PCAF-sovereign-footprint/dbt/pcaf_transform/models", mode=0o755)
except FileExistsError:
    pass
shutil.rmtree("/opt/app-root/src/PCAF-sovereign-footprint/dbt/pcaf_transform/models/.ipynb_checkpoints", ignore_errors=True)

In [7]:
custom_meta_content = {
    'data provider': df[['data_provider']],
    'title': 'The PRIMAP-hist national historical emissions time series (1750-2021) v2.4',
    'author': 'Gütschow, J.; Jeffery, L.; Gieseke, R.; Gebel, R.; Stevens, D.; Krapp, M.; Rocha, M.',
    'contact': 'github.com/JGuetschow/PRIMAP-hist',
    'description': json.dumps("""
Gütschow, J.; Pflüger, M. (2023): The PRIMAP-hist national historical emissions time series v2.4.1 (1750-2021). zenodo. doi:10.5281/zenodo.7585420."""),
    'release_date': '2022-10-17, 2022 00:00:00',
    # How should we describe our transformative step here?
}

def description_is (s):
    return { 'Description': json.dumps(s)}

custom_meta_fields = {}
custom_meta_fields['rec_source'] = description_is("Guetschow-et-al-2022-PRIMAP-hist_v2.4_11-Oct-2022.csv")
custom_meta_fields['data_provider'] = description_is("PRIMAP")
custom_meta_fields['country_iso_code'] = description_is("ISO-3166 Country Code (alpha_3)")
custom_meta_fields['country_name'] = description_is("ISO-3166 Country Name")
custom_meta_fields['attribute'] = description_is("KYOTOGHG (AR4GWP100)")
custom_meta_fields['value'] = description_is("value of GDP or GDP PPP")
custom_meta_fields['value_units'] = description_is("kt CO2 / a")
custom_meta_fields['validity_date'] = description_is("Year of measurement")

for f in custom_meta_fields:
    if 'ISO' in custom_meta_fields[f]['Description']:
        custom_meta_fields[f]['tags'] = ['ISO']
    elif 'IPCC' in custom_meta_fields[f]['Description']:
        custom_meta_fields[f]['tags'] = ['IPCC']
    elif 'WDI' in custom_meta_fields[f]['Description']:
        custom_meta_fields[f]['tags'] = ['WDI']
    elif f == 'year':
        custom_meta_fields[f]['tags'] = ['annual']

In [6]:
create_trino_table_and_dbt_metadata(ingest_table, df, ['country_iso_code'], custom_meta_content, custom_meta_fields, verbose=True)

drop table if exists mdt_sandbox.sf_primap_hist_emissions_source
enforcing dataframe partition column order

verifying existence of table osc_datacommons_dev.mdt_sandbox.sf_primap_hist_emissions_source
create table if not exists osc_datacommons_dev.mdt_sandbox.sf_primap_hist_emissions_source (
    rec_source varchar,
    data_provider varchar,
    country_name bigint,
    validity_date integer,
    attribute varchar,
    value double,
    value_units varchar,
    country_iso_code varchar
) with (
    format = 'parquet',
    partitioning = array['country_iso_code']
)

staging dataframe parquet to s3 osc-datacommons-s3-bucket-dev02
/tmp/ingest_temp_a6fc7d73/country_iso_code=ABW/2672c41a83ff4f81a3a9eaa682a5f49a-0.parquet  -->  trino/ingest/ingest_temp_a6fc7d73/country_iso_code=ABW/2672c41a83ff4f81a3a9eaa682a5f49a-0.parquet
/tmp/ingest_temp_a6fc7d73/country_iso_code=AFG/2672c41a83ff4f81a3a9eaa682a5f49a-0.parquet  -->  trino/ingest/ingest_temp_a6fc7d73/country_iso_code=AFG/2672c41a83ff4f81a

In [None]:
sql=f"""
select *  from {ingest_catalog}.{ingest_schema}.{ingest_table}_source where validity_date=2021 and country_iso_code='ARG'"""
pd.read_sql(sql, engine)

In [16]:
osc._do_sql(f"insert into {ingest_schema}.{models_table} values(\'{json.dumps(dbt_dict['models'])}\')", engine, verbose=True)

insert into mdt_sandbox.pcaf_dbt_models values('{"sf_wdi_gdp": {"description": "\"\\nThis indicator provides per capita values for gross domestic product (GDP) expressed in current international dollars converted by purchasing power parity (PPP) conversion factor. \\n\\nGDP is the sum of gross value added by all resident producers in the country plus any product taxes and minus any subsidies not included in the value of the products. conversion factor is a spatial price deflator and currency converter that controls for price level differences between countries. Total population is a mid-year population based on the de facto definition of population, which counts all residents regardless of legal status or citizenship.\"", "columns": {"rec_source": {"description": "\"API_NY.GDP.MKTP.CD_DS2_en_csv_v2.csv and API_NY.GDP.MKTP.PP.CD_DS2_en_csv_v2.csv\""}, "data_provider": {"description": "\"WDI\"", "tags": ["WDI"]}, "country_iso_code": {"description": "\"ISO-3166 Country Code (alpha_3)\"", 

[(1,)]