<font size="5">OECD Exchange Rates into Trino pipeline</font>

<font size="3">https://data.oecd.org/conversion/exchange-rates.htm</font>

Load Environment Variables

In [None]:
from dotenv import dotenv_values, load_dotenv
import osc_ingest_trino as osc
import os
import pathlib
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [None]:
# use a catalog that is configured for iceberg
ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'pcaf_sovereign_footprint'
ingest_table = 'sf_oecd_exch_rates'

In [None]:
import trino
from sqlalchemy.engine import create_engine

env_var_prefix = 'TRINO'

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ[f'{env_var_prefix}_USER'],
    host = os.environ[f'{env_var_prefix}_HOST'],
    port = os.environ[f'{env_var_prefix}_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ[f'{env_var_prefix}_PASSWD']),
    'http_scheme': 'https',
    'catalog': 'osc_datacommons_dev'
}
engine = create_engine(sqlstring, connect_args = sqlargs)
connection = engine.connect()

trino_bucket = osc.attach_s3_bucket("S3_DEV")

In [None]:
import boto3

s3_source = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ['S3_LANDING_ENDPOINT'],
    aws_access_key_id=os.environ['S3_LANDING_ACCESS_KEY'],
    aws_secret_access_key=os.environ['S3_LANDING_SECRET_KEY'],
)
source_bucket = s3_source.Bucket(os.environ['S3_LANDING_BUCKET'])

Open a Trino connection using JWT for authentication

In [None]:
# make sure schema exists, or table creation below will fail in weird ways
sql = f"""
create schema if not exists {ingest_catalog}.{ingest_schema}
"""
qres = engine.execute(sql)
#print(qres.fetchall())

In [None]:
# Show available schemas to ensure trino connection is set correctly
schema_read = engine.execute(f'show schemas in {ingest_catalog}')
for row in schema_read.fetchall():
    print(row)

In [None]:
import pandas as pd
import csv


oecd_file = s3_source.Object(os.environ['S3_LANDING_BUCKET'],'PCAF-sovereign-footprint/OECD/DP_LIVE_19072022170858805.csv')
oecd_file.download_file(f'/tmp/DP_LIVE_19072022170858805.csv')
#df = parser.process('OECD_IMGR_FCO2.ini','OECD.csv') 
df =pd.read_csv(f'/tmp/DP_LIVE_19072022170858805.csv')
df= df[['INDICATOR','LOCATION','TIME','Value']]
df=df.convert_dtypes()
df
dict = {'INDICATOR': 'attribute',
        'LOCATION': 'country_iso_code',
        'TIME'   : 'validity_date',
        'Value' : 'value' }

df.rename(columns=dict,
          inplace=True)

df= df[df['country_iso_code'] == 'DEU']

df



In [None]:
#from datetime import datetime
#df["validity_date"]= df["validity_date"].apply(str) + "0101"
#df["validity_date"]= pd.to_datetime(df["validity_date"]).
#df=df.convert_dtypes()
#df
#df.info(verbose=True)

In [None]:
import osc_ingest_trino as osc


columnschema = osc.create_table_schema_pairs(df) 

sql = f"""
drop table if exists {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
print(sql)
qres = engine.execute(sql)
print(qres.fetchall())

In [None]:
df.info(verbose=True)
        
        

In [None]:
df.to_sql(ingest_table,
           con=engine,
           schema=ingest_schema,
           if_exists='append',
           index=False,
           method=osc.TrinoBatchInsert(batch_size = 5000, verbose = True))

In [None]:
sql=f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table}" + "where validity_date=2020"""
pd.read_sql(sql, engine)