# Data Vault Demo (Cleanup)

Clean up tables created by Data Vault Demo

Only table creators can drop tables, so we need to instantiate the engines that created the tables...

In [1]:
import os
import pathlib
from dotenv import load_dotenv

# Load some standard environment variables from a dot-env file, if it exists.
# If no such file can be found, does not fail, and so allows these environment vars to
# be populated in some other way
dotenv_dir = os.environ.get('CREDENTIAL_DOTENV_DIR', os.environ.get('PWD', '/opt/app-root/src'))
dotenv_path = pathlib.Path(dotenv_dir) / 'credentials.env'
if os.path.exists(dotenv_path):
    load_dotenv(dotenv_path=dotenv_path,override=True)

import trino
import osc_ingest_trino as osc
from sqlalchemy.engine import create_engine

The ITR Data Pipeline creates these tables.  We should not delete this data unless we created them as TRINO_USER1 as part of the construction of the vault.

In [2]:
print("Cleaning up Dev tables")

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ['TRINO_USER_USER1'],
    host = os.environ['TRINO_HOST'],
    port = os.environ['TRINO_PORT']
)

ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'demo_dv'

sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ['TRINO_PASSWD_USER1']),
    'http_scheme': 'https',
    'catalog': ingest_catalog,
    'schema': ingest_schema,
}

engine_dev = create_engine(sqlstring, connect_args = sqlargs)
print("connecting with engine " + str(engine_dev))
connection_dev = engine_dev.connect()
engine_dev.execute(f"show tables in {ingest_schema}").fetchall()

Cleaning up Dev tables
connecting with engine Engine(trino://os-climate-user1@trino-secure-odh-trino.apps.odh-cl2.apps.os-climate.org:443/)


  res = connection.execute(sql.text(query)).scalar()


[('company_data',),
 ('emissions_data',),
 ('intensity_data',),
 ('isic_to_sector',),
 ('oecm_cumprod',),
 ('production_data',),
 ('target_data',),
 ('trajectory_data',)]

In [3]:
for table in ['benchmark_ei', 'benchmark_prod',
              'cumulative_budget_1', 'cumulative_emissions']:
    print(f"Dropping Dev table {table}")
    engine_dev.execute(f"drop table if exists {table}").fetchall()

Dropping Dev table benchmark_ei
Dropping Dev table benchmark_prod
Dropping Dev table cumulative_budget_1
Dropping Dev table cumulative_emissions


In [4]:
print("Cleaning up Quant tables")

sqlstring = 'trino://{user}@{host}:{port}/'.format(
    user = os.environ['TRINO_USER_USER2'],
    host = os.environ['TRINO_HOST'],
    port = os.environ['TRINO_PORT']
)

ingest_catalog = 'osc_datacommons_dev'
ingest_schema = 'demo_dv'

sqlargs = {
    'auth': trino.auth.JWTAuthentication(os.environ['TRINO_PASSWD_USER2']),
    'http_scheme': 'https',
    'catalog': ingest_catalog,
    'schema': ingest_schema,
}

engine_quant = create_engine(sqlstring, connect_args = sqlargs)
print("connecting with engine " + str(engine_quant))
connection_quant = engine_quant.connect()
engine_quant.execute(f"show tables in {ingest_schema}").fetchall()

Cleaning up Quant tables
connecting with engine Engine(trino://os-climate-user2@trino-secure-odh-trino.apps.odh-cl2.apps.os-climate.org:443/)


[('company_data',),
 ('emissions_data',),
 ('intensity_data',),
 ('isic_to_sector',),
 ('oecm_cumprod',),
 ('production_data',),
 ('target_data',),
 ('trajectory_data',)]

In [5]:
for table in ['overshoot_ratios',
              'temperature_scores']:
    print(f"Dropping Quant table {table}")
    engine_quant.execute(f"drop table if exists {table}").fetchall()

Dropping Quant table overshoot_ratios
Dropping Quant table temperature_scores
