Run these in a cell if you need to install onto your nb env
```
%%capture pipoutput
%pip install boto3 python-dotenv
%pip install trino sqlalchemy sqlalchemy-trino
%pip install pandas pyarrow fastparquet
%pip install anytree
%pip install osc-ingest-tools
```

In [1]:
import osc_ingest_trino as osc
osc.load_credentials_dotenv()
bucket = osc.attach_s3_bucket('S3_DEV')
engine_dev = osc.attach_trino_engine('TRINO_OSCU1')
engine_quant = osc.attach_trino_engine('TRINO_OSCU2')
engine_user = osc.attach_trino_engine('TRINO_OSCU3')

In [2]:
import os
print(os.environ['TRINO_OSCU1_USER'])
print(os.environ['TRINO_OSCU2_USER'])
print(os.environ['TRINO_OSCU3_USER'])

os-climate-user1
os-climate-user2
os-climate-user3


In [3]:
import pandas as pd
data = [['company1',1.1], ['company2', 2.2], ['company3', 3.3]]
dfbe = pd.DataFrame(data, columns = ['company_name','dev1'])
dfbe = dfbe.convert_dtypes()
print(dfbe.info(verbose=True))
dfbe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   company_name  3 non-null      string 
 1   dev1          3 non-null      Float64
dtypes: Float64(1), string(1)
memory usage: 179.0 bytes
None


Unnamed: 0,company_name,dev1
0,company1,1.1
1,company2,2.2
2,company3,3.3


In [4]:
ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'demo'
ingest_table = 'demo_dv_backend'

In [5]:
osc.drop_unmanaged_table(ingest_catalog, ingest_schema, ingest_table, engine_dev, bucket, verbose=True)
osc.ingest_unmanaged_parquet(dfbe, ingest_schema, ingest_table, bucket,
                             verbose=True)
sql = osc.unmanaged_parquet_tabledef(dfbe, ingest_catalog, ingest_schema, ingest_table, bucket,
                                    verbose = True)
qres = engine_dev.execute(sql)
print(qres.fetchall())

[{'ResponseMetadata': {'RequestId': 'DJ6KVQVBY5M72DQH', 'HostId': 'a++lyCY0mSCsvf33bst8a0zh/wgiuNNyA1xB9hgK7eHQOFVePxFqLh7SC1okFXzqD1nG4p7Q+iU=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'a++lyCY0mSCsvf33bst8a0zh/wgiuNNyA1xB9hgK7eHQOFVePxFqLh7SC1okFXzqD1nG4p7Q+iU=', 'x-amz-request-id': 'DJ6KVQVBY5M72DQH', 'date': 'Wed, 08 Dec 2021 23:23:58 GMT', 'content-type': 'application/xml', 'transfer-encoding': 'chunked', 'server': 'AmazonS3', 'connection': 'close'}, 'RetryAttempts': 0}, 'Deleted': [{'Key': 'trino/demo/demo_dv_backend/c9682d24c7ab44e5acfdd9c4522f3d9f.parquet'}]}]
/tmp/a42dd7191cfd4714aa9c1e867c1cf036.parquet  -->  trino/demo/demo_dv_backend/a42dd7191cfd4714aa9c1e867c1cf036.parquet
create table if not exists osc_datacommons_dev.demo.demo_dv_backend (
    company_name varchar,
    dev1 double
) with (
    format = 'parquet',
    external_location = 's3a://ocp-odh-os-demo-s3/trino/demo/demo_dv_backend/'
)
[(True,)]


In [6]:
sql = f"""
select * from {ingest_catalog}.{ingest_schema}.{ingest_table}
"""
dfuf = pd.read_sql(sql, engine_dev)
dfuf

Unnamed: 0,company_name,dev1
0,company1,1.1
1,company2,2.2
2,company3,3.3


In [7]:
nrows = dfuf.shape[0]
npublic = max(1, int(nrows * 0.1))
neval = max(1, int(nrows * 0.1))
nlicensed = nrows - npublic - neval
assert(nlicensed > 0)
accesscol = ['public']*npublic
accesscol.extend(['demo_dv_licensed']*nlicensed)
accesscol.extend(['demo_dv_eval']*neval)
accesscol

['public', 'demo_dv_licensed', 'demo_dv_eval']

In [8]:
import math
dfuf['quant1'] = dfuf['dev1'].map(lambda x: 2 * x)
dfuf['user1'] = dfuf['quant1'].map(lambda x: x * x)
dfuf['user2'] = dfuf['quant1'].map(lambda x: math.sqrt(x))
dfuf['access'] = accesscol
dfuf = dfuf.convert_dtypes()
dfuf

Unnamed: 0,company_name,dev1,quant1,user1,user2,access
0,company1,1.1,2.2,4.84,1.48324,public
1,company2,2.2,4.4,19.36,2.097618,demo_dv_licensed
2,company3,3.3,6.6,43.56,2.569047,demo_dv_eval


In [9]:
ingest_table = 'demo_dv_userfacing'

In [10]:
osc.drop_unmanaged_table(ingest_catalog, ingest_schema, ingest_table, engine_dev, bucket, verbose=True)
osc.ingest_unmanaged_parquet(dfuf, ingest_schema, ingest_table, bucket,
                             verbose=True)
sql = osc.unmanaged_parquet_tabledef(dfuf, ingest_catalog, ingest_schema, ingest_table, bucket,
                                    verbose = True)
qres = engine_dev.execute(sql)
print(qres.fetchall())

[{'ResponseMetadata': {'RequestId': 'F7YM2SE2V1V0ADCM', 'HostId': '228gynI0yNv7nvEFt4xjQhlNNRIl43LavijISbQW5BUmu5SDmj9YVLuupyBM6EsjbT15UwWxK6Y=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': '228gynI0yNv7nvEFt4xjQhlNNRIl43LavijISbQW5BUmu5SDmj9YVLuupyBM6EsjbT15UwWxK6Y=', 'x-amz-request-id': 'F7YM2SE2V1V0ADCM', 'date': 'Wed, 08 Dec 2021 23:24:00 GMT', 'content-type': 'application/xml', 'transfer-encoding': 'chunked', 'server': 'AmazonS3', 'connection': 'close'}, 'RetryAttempts': 0}, 'Deleted': [{'Key': 'trino/demo/demo_dv_userfacing/a9ac28b40ab446ea978e4e8423c0750a.parquet'}]}]
/tmp/9f0c0b8ee8c847dcb6b10b02ee38a961.parquet  -->  trino/demo/demo_dv_userfacing/9f0c0b8ee8c847dcb6b10b02ee38a961.parquet
create table if not exists osc_datacommons_dev.demo.demo_dv_userfacing (
    company_name varchar,
    dev1 double,
    quant1 double,
    user1 double,
    user2 double,
    access varchar
) with (
    format = 'parquet',
    external_location = 's3a://ocp-odh-os-demo-s3/trino/demo/de

In [11]:
userfacing_table = f'{ingest_catalog}.{ingest_schema}.{ingest_table}'

In [12]:
sql = f"""
select * from {userfacing_table}
"""
df = pd.read_sql(sql, engine_dev)
df

Unnamed: 0,company_name,dev1,quant1,user1,user2,access
0,company1,1.1,2.2,4.84,1.48324,public
1,company2,2.2,4.4,19.36,2.097618,demo_dv_licensed
2,company3,3.3,6.6,43.56,2.569047,demo_dv_eval


In [13]:
sql = f"""
select access, company_name, quant1, user1, user2 from {userfacing_table}
"""
df = pd.read_sql(sql, engine_quant)
df

Unnamed: 0,access,company_name,quant1,user1,user2
0,public,company1,2.2,4.84,1.48324
1,demo_dv_eval,company3,6.6,43.56,2.569047


In [14]:
try:
    sql = f"""
        select dev1 from {userfacing_table}
    """
    df = pd.read_sql(sql, engine_quant)
except Exception as e:
    print(e)

TrinoUserError(type=USER_ERROR, name=PERMISSION_DENIED, message="Access Denied: Cannot select from table osc_datacommons_dev.demo.demo_dv_userfacing", query_id=20211208_232400_00545_kx99a)


In [15]:
sql = f"""
select access, company_name, user1, user2 from {userfacing_table}
"""
df = pd.read_sql(sql, engine_user)
df

Unnamed: 0,access,company_name,user1,user2
0,public,company1,4.84,1.48324


In [16]:
try:
    sql = f"""
        select dev1 from {userfacing_table}
    """
    df = pd.read_sql(sql, engine_user)
except Exception as e:
    print(e)

TrinoUserError(type=USER_ERROR, name=PERMISSION_DENIED, message="Access Denied: Cannot select from table osc_datacommons_dev.demo.demo_dv_userfacing", query_id=20211208_232400_00547_kx99a)


In [17]:
try:
    sql = f"""
        select  quant1 from {userfacing_table}
    """
    df = pd.read_sql(sql, engine_user)
except Exception as e:
    print(e)

TrinoUserError(type=USER_ERROR, name=PERMISSION_DENIED, message="Access Denied: Cannot select from table osc_datacommons_dev.demo.demo_dv_userfacing", query_id=20211208_232400_00548_kx99a)


In [18]:
# going to start doing table drop at the end, which
# should increase odds of other being able to run demos
osc.drop_unmanaged_table(ingest_catalog, ingest_schema, 'demo_dv_backend', engine_dev, bucket, verbose=True)
osc.drop_unmanaged_table(ingest_catalog, ingest_schema, 'demo_dv_userfacing', engine_dev, bucket, verbose=True)

[{'ResponseMetadata': {'RequestId': 'XSGG7D9FTZD8JG4W', 'HostId': 'Sfa1NrrYiNVTQcdQ9MWUWA6h9K43qGqZgO+41qaVA83HAEnyQBIDC0o18YRdv/YBSH9Jdtik8ss=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'Sfa1NrrYiNVTQcdQ9MWUWA6h9K43qGqZgO+41qaVA83HAEnyQBIDC0o18YRdv/YBSH9Jdtik8ss=', 'x-amz-request-id': 'XSGG7D9FTZD8JG4W', 'date': 'Wed, 08 Dec 2021 23:24:02 GMT', 'content-type': 'application/xml', 'transfer-encoding': 'chunked', 'server': 'AmazonS3', 'connection': 'close'}, 'RetryAttempts': 0}, 'Deleted': [{'Key': 'trino/demo/demo_dv_backend/a42dd7191cfd4714aa9c1e867c1cf036.parquet'}]}]
[{'ResponseMetadata': {'RequestId': 'XSGQ0CR5342G8WZG', 'HostId': '1h3Qq2gEGkbNBoqlrUrb7SgnVBmlCXCehCN1qM9RD07/yJoFo6Upu7SOVb2SnhaY4zh+dHtUrUE=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': '1h3Qq2gEGkbNBoqlrUrb7SgnVBmlCXCehCN1qM9RD07/yJoFo6Upu7SOVb2SnhaY4zh+dHtUrUE=', 'x-amz-request-id': 'XSGQ0CR5342G8WZG', 'date': 'Wed, 08 Dec 2021 23:24:02 GMT', 'content-type': 'application/xml', 'transfer-encodin

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f812c8b0160>