Run these in a cell if you need to install onto your nb env
```
%%capture pipoutput
%pip install boto3 python-dotenv
%pip install trino sqlalchemy sqlalchemy-trino
%pip install pandas pyarrow fastparquet
%pip install anytree
%pip install osc-ingest-tools
```

In [1]:
from dotenv import dotenv_values, load_dotenv
import os
import pathlib

dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

In [2]:
print(os.environ['TRINO_USER_USER1'])
print(os.environ['TRINO_USER_USER2'])
print(os.environ['TRINO_USER_USER3'])

os-climate-user1
os-climate-user2
os-climate-user3


In [3]:
import trino
from sqlalchemy.engine import create_engine

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ['TRINO_USER_USER1'],
    host = os.environ['TRINO_HOST'],
    port = os.environ['TRINO_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ['TRINO_PASSWD_USER1']),
    'http_scheme': 'https'
}
engine_dev = create_engine(sqlstring, connect_args = sqlargs)
print("connecting with engine " + str(engine_dev))
connection_dev = engine_dev.connect()

connecting with engine Engine(trino://os-climate-user1@trino-secure-odh-trino.apps.odh-cl1.apps.os-climate.org:443/)


In [4]:
import boto3
s3 = boto3.resource(
    service_name="s3",
    endpoint_url=os.environ["S3_DEV_ENDPOINT"],
    aws_access_key_id=os.environ["S3_DEV_ACCESS_KEY"],
    aws_secret_access_key=os.environ["S3_DEV_SECRET_KEY"],
)
bucket = s3.Bucket(os.environ["S3_DEV_BUCKET"])

In [5]:
import pandas as pd
data = [['company1',1.1], ['company2', 2.2], ['company3', 3.3]]
dfbe = pd.DataFrame(data, columns = ['company_name','dev1'])
dfbe = dfbe.convert_dtypes()
print(dfbe.info(verbose=True))
dfbe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   company_name  3 non-null      string 
 1   dev1          3 non-null      Float64
dtypes: Float64(1), string(1)
memory usage: 179.0 bytes
None


Unnamed: 0,company_name,dev1
0,company1,1.1
1,company2,2.2
2,company3,3.3


In [6]:
ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'demo'
ingest_table = 'demo_dv_backend'
ingest_prefix = f'trino/{ingest_schema}/{ingest_table}/'

In [7]:
tmp = f'/tmp/{ingest_table}.parquet'
dfbe.to_parquet(tmp,index=False)
bucket.upload_file(tmp, f'{ingest_prefix}{ingest_table}.parquet')

In [8]:
import osc_ingest_trino as osc
columnschema = osc.create_table_schema_pairs(dfbe)

sql = f"""
create table if not exists {ingest_catalog}.{ingest_schema}.{ingest_table}(
{columnschema}
) with (
    format = 'parquet',
    external_location = 's3a://{bucket.name}/{ingest_prefix}'
)
"""
print(sql)
qres = engine_dev.execute(sql)
print(qres.fetchall())


create table if not exists osc_datacommons_dev.demo.demo_dv_backend(
    company_name varchar,
    dev1 double
) with (
    format = 'parquet',
    external_location = 's3a://ocp-odh-os-demo-s3/trino/demo/demo_dv_backend/'
)

[(True,)]


In [9]:
sql = f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
dfuf = pd.read_sql(sql, engine_dev)
dfuf

Unnamed: 0,company_name,dev1
0,company1,1.1
1,company2,2.2
2,company3,3.3


In [10]:
import math
dfuf['quant1'] = dfuf['dev1'].map(lambda x: 2 * x)
dfuf['user1'] = dfuf['quant1'].map(lambda x: x * x)
dfuf['user2'] = dfuf['quant1'].map(lambda x: math.sqrt(x))
dfuf = dfuf.convert_dtypes()

In [11]:
ingest_table = 'demo_dv_userfacing'
ingest_prefix = f'trino/{ingest_schema}/{ingest_table}/'

In [12]:
tmp = f'/tmp/{ingest_table}.parquet'
dfuf.to_parquet(tmp,index=False)
bucket.upload_file(tmp, f'{ingest_prefix}{ingest_table}.parquet')

In [13]:
columnschema = osc.create_table_schema_pairs(dfuf)

sql = f"""
create table if not exists {ingest_catalog}.{ingest_schema}.{ingest_table}(
{columnschema}
) with (
    format = 'parquet',
    external_location = 's3a://{bucket.name}/{ingest_prefix}'
)
"""
print(sql)
qres = engine_dev.execute(sql)
print(qres.fetchall())


create table if not exists osc_datacommons_dev.demo.demo_dv_userfacing(
    company_name varchar,
    dev1 double,
    quant1 double,
    user1 double,
    user2 double
) with (
    format = 'parquet',
    external_location = 's3a://ocp-odh-os-demo-s3/trino/demo/demo_dv_userfacing/'
)

[(True,)]


In [14]:
userfacing_table = f'{ingest_catalog}.{ingest_schema}.{ingest_table}'

In [15]:
sql = f"""
select * from {userfacing_table}
"""
df = pd.read_sql(sql, engine_dev)
df

Unnamed: 0,company_name,dev1,quant1,user1,user2
0,company1,1.1,2.2,4.84,1.48324
1,company2,2.2,4.4,19.36,2.097618
2,company3,3.3,6.6,43.56,2.569047


In [16]:
sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ['TRINO_USER_USER2'],
    host = os.environ['TRINO_HOST'],
    port = os.environ['TRINO_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ['TRINO_PASSWD_USER2']),
    'http_scheme': 'https'
}
engine_quant = create_engine(sqlstring, connect_args = sqlargs)
print("connecting with engine " + str(engine_quant))
connection_quant = engine_quant.connect()

connecting with engine Engine(trino://os-climate-user2@trino-secure-odh-trino.apps.odh-cl1.apps.os-climate.org:443/)


In [17]:
sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ['TRINO_USER_USER3'],
    host = os.environ['TRINO_HOST'],
    port = os.environ['TRINO_PORT']
)
sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ['TRINO_PASSWD_USER3']),
    'http_scheme': 'https'
}
engine_user = create_engine(sqlstring, connect_args = sqlargs)
print("connecting with engine " + str(engine_user))
connection_user = engine_user.connect()

connecting with engine Engine(trino://os-climate-user3@trino-secure-odh-trino.apps.odh-cl1.apps.os-climate.org:443/)


In [18]:
sql = f"""
select company_name, quant1, user1, user2 from {userfacing_table}
"""
df = pd.read_sql(sql, engine_quant)
df

Unnamed: 0,company_name,quant1,user1,user2
0,company1,2.2,4.84,1.48324
1,company2,4.4,19.36,2.097618
2,company3,6.6,43.56,2.569047


In [19]:
try:
    sql = f"""
        select dev1 from {userfacing_table}
    """
    df = pd.read_sql(sql, engine_quant)
except Exception as e:
    print(e)

TrinoUserError(type=USER_ERROR, name=PERMISSION_DENIED, message="Access Denied: Cannot select from table osc_datacommons_dev.demo.demo_dv_userfacing", query_id=20211130_001810_00050_8n8pw)


In [20]:
sql = f"""
select company_name, user1, user2 from {userfacing_table}
"""
df = pd.read_sql(sql, engine_user)
df

Unnamed: 0,company_name,user1,user2
0,company1,4.84,1.48324
1,company2,19.36,2.097618
2,company3,43.56,2.569047


In [21]:
try:
    sql = f"""
        select dev1 from {userfacing_table}
    """
    df = pd.read_sql(sql, engine_user)
except Exception as e:
    print(e)

TrinoUserError(type=USER_ERROR, name=PERMISSION_DENIED, message="Access Denied: Cannot select from table osc_datacommons_dev.demo.demo_dv_userfacing", query_id=20211130_001810_00052_8n8pw)


In [22]:
try:
    sql = f"""
        select  quant1 from {userfacing_table}
    """
    df = pd.read_sql(sql, engine_user)
except Exception as e:
    print(e)

TrinoUserError(type=USER_ERROR, name=PERMISSION_DENIED, message="Access Denied: Cannot select from table osc_datacommons_dev.demo.demo_dv_userfacing", query_id=20211130_001810_00053_8n8pw)
