Library Dependencies

In [1]:
# pip install python-dotenv
# pip install --upgrade sqlalchemy==1.3 sqlalchemy-trino
# pip install pandas

In [6]:
from dotenv import dotenv_values, load_dotenv
import os
import pathlib
from sqlalchemy.engine import create_engine
import pandas as pd
from trino.auth import JWTAuthentication

In [9]:
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

SQL Alchemy DB Connection through Trino

In [10]:
# Create a SQL engine via Trino,using Trino access token via connect_args
# args for presto request can be found at https://github.com/prestodb/presto-python-client/blob/master/prestodb/client.py
engine = create_engine(
  'trino://' + os.environ['TRINO_USER'] + '@' + os.environ['TRINO_HOST'] + ':' + os.environ['TRINO_PORT'] + '/',
  connect_args={'auth': JWTAuthentication(os.environ['TRINO_PASSWD']),'http_scheme': 'https'},
)
print("connecting with engine " + str(engine))
connection = engine.connect()

connecting with engine Engine(trino://caldeirav@trino-secure-odh-trino.apps.odh-cl1.apps.os-climate.org:443/)


Show available schemas and read from a demo table

In [11]:
schema_read = engine.execute("show schemas in osc_datacommons_dev")
for row in table_read.fetchall():
    print(row)

In [11]:
table_read = engine.execute("select * from osc_datacommons_dev.pudl.y95_al")
print(f"Selected {table_read.rowcount} rows.")
for row in table_read.fetchall():
    print(row)

Selected -1 rows.
(7, '1', '1995-01-01 06:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 07:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 08:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 09:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 10:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 11:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 12:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 13:00:00.000', 0.0, 0.0, None, None, None, None, None, None, None, None, None, 0.0, None, None)
(7, '1', '1995-01-01 14:00:00.000', 0.0, 0.0, 

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



(47, '4', '1995-05-03 18:00:00.000', 1.0, 191.0, None, 2929.418, 'Measured', 0.402, 'Measured', 795.062, 'Calculated', 202.921, 'Measured', 1977.766, None, None)
(47, '4', '1995-05-03 19:00:00.000', 1.0, 188.0, None, 2953.484, 'Measured', 0.403, 'Measured', 801.083, 'Calculated', 203.96, 'Measured', 1987.8, None, None)
(47, '4', '1995-05-03 20:00:00.000', 1.0, 192.0, None, 2987.771, 'Measured', 0.411, 'Measured', 831.782, 'Calculated', 207.641, 'Measured', 2023.8, None, None)
(47, '4', '1995-05-03 21:00:00.000', 1.0, 180.0, None, 2817.624, 'Measured', 0.405, 'Measured', 772.011, 'Calculated', 195.58, 'Measured', 1906.2, None, None)
(47, '4', '1995-05-03 22:00:00.000', 1.0, 186.0, None, 2964.212, 'Measured', 0.408, 'Measured', 809.35, 'Calculated', 203.543, 'Measured', 1983.7, None, None)
(47, '4', '1995-05-03 23:00:00.000', 1.0, 177.0, None, 2810.34, 'Measured', 0.409, 'Measured', 777.427, 'Calculated', 195.02, 'Measured', 1900.8, None, None)
(47, '4', '1995-05-04 00:00:00.000', 1.0, 1

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



(26, '5', '1995-10-09 03:00:00.000', 1.0, 442.0, None, 4971.1, 'Measured', 0.621, 'Measured', 1973.6, 'Calculated', 326.0, 'Measured', 3178.1, 6, 34)
(26, '5', '1995-10-09 04:00:00.000', 1.0, 370.0, None, 4206.0, 'Measured', 0.789, 'Measured', 2171.328, 'Calculated', 282.3, 'Measured', 2752.0, 6, 34)
(26, '5', '1995-10-09 05:00:00.000', 1.0, 352.0, None, 3981.1, 'Measured', 0.962, 'Measured', 2523.711, 'Calculated', 269.1, 'Measured', 2623.4, 6, 34)
(26, '5', '1995-10-09 06:00:00.000', 1.0, 347.0, None, 3839.8, 'Measured', 1.111, 'Measured', 2829.384, 'Calculated', 261.2, 'Measured', 2546.7, 6, 34)
(26, '5', '1995-10-09 07:00:00.000', 1.0, 343.0, None, 3765.2, 'Measured', 0.791, 'Measured', 1984.303, 'Calculated', 257.3, 'Measured', 2508.6, 6, 34)
(26, '5', '1995-10-09 08:00:00.000', 1.0, 344.0, None, 3756.9, 'Measured', 0.82, 'Measured', 2083.374, 'Calculated', 260.6, 'Measured', 2540.7, 6, 34)
(26, '5', '1995-10-09 09:00:00.000', 1.0, 340.0, None, 3793.8, 'Measured', 0.841, 'Measured

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)

