# How to...create Matching and No-Matching Tables in Trino

In [1]:
# Sets up the location of the api relative to this notebook 
import sys
sys.path.append('../../../')

In [2]:
# Import the module for connection to a sqllite database
from esg_matching.engine.connectors.trino import TrinoConnector

In [3]:
# Import the modules for file management
from esg_matching.file_reader.file import File
from esg_matching.file_reader.csv_reader import FileReaderCsv

In [4]:
# Import the modules for the etl processing: reading, transformation and loading data to a database
from esg_matching.processing.etl import EtlProcessing

## 1. Database setup

In [5]:
# Import the module for connection to a sqllite database
from esg_matching.engine.connectors.trino import TrinoConnector

In [6]:
import os
user_trino = os.environ['TRINO_USER']
pwd_trino = os.environ['TRINO_PASSWD']
host_trino = os.environ['TRINO_HOST']
port_trino = int(os.environ['TRINO_PORT'])

In [7]:
# The database connector is represented by the class SqlLiteConnector 
db_conn = TrinoConnector()

In [8]:
# The connect() method of the SqlLiteConnector is used to stablish a connection with the database if it exists, 
# or to create a new one. The property path_db defines the location and name of the database.
# The  property show_sql_statement indicates if the SQL statements are echoed (or printed) in the default output channel.
db_conn.username = user_trino
db_conn.user_password = pwd_trino
db_conn.host_url = host_trino
db_conn.port_number = port_trino
db_conn.catalog = 'osc_datacommons_iceberg_dev'
db_conn.show_sql_statement = True
db_conn.connect()

2022-07-21 15:39:59,701 INFO sqlalchemy.engine.Engine SELECT version()
2022-07-21 15:39:59,705 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00425s] ()


In [9]:
# Check if the connection was stablished
db_conn.is_connected()

True

## 2. Create ETL object

In [10]:
# Create an ETL process object
etl_proc_obj = EtlProcessing(db_conn)

## 3. Create Matching from file settings

In [11]:
# Settings for Macthing Table
match_settings = '../../../tests/data/howto/trino/test_matching_trino.json'
match_settings

'../../../tests/data/howto/trino/test_matching_trino.json'

In [12]:
# Create a file object
file_match = File(match_settings)

In [13]:
# Call the create_data_source_from_settings() method by passing the File
db_matching = etl_proc_obj.create_data_source(file_match)

2022-07-21 15:40:05,850 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
2022-07-21 15:40:05,852 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00178s] ('esg_matching',)
2022-07-21 15:40:07,923 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-07-21 15:40:07,925 INFO sqlalchemy.engine.Engine 
CREATE TABLE esg_matching.esg_matching (
	timestamp TIMESTAMP(6), 
	matching_id BIGINT, 
	ref_name VARCHAR, 
	tgt_name VARCHAR, 
	matching_type VARCHAR, 
	matching_scope VARCHAR, 
	matching_rule VARCHAR, 
	ref_id VARCHAR, 
	ref_company VARCHAR, 
	ref_country VARCHAR, 
	tgt_id VARCHAR, 
	tgt_company VARCHAR, 
	tgt_country VARCHAR, 
	isin VARCHAR, 
	lei VARCHAR, 
	sedol VARCHAR
)


2022-07-21 15:40:07,926 INFO sqlalchemy.engine.Engine [no key 0.00142s] ()
2022-07-21 15:40:08,978 INFO sqlalchemy.engine.Engine COMMIT


In [14]:
# Retrieve the attribute names of the database table
db_matching.get_attribute_names()

['timestamp',
 'matching_id',
 'ref_name',
 'tgt_name',
 'matching_type',
 'matching_scope',
 'matching_rule',
 'ref_id',
 'ref_company',
 'ref_country',
 'tgt_id',
 'tgt_company',
 'tgt_country',
 'isin',
 'lei',
 'sedol']

## 4. Create No-Matching from file settings

In [15]:
# Settings for No-Macthing Table
no_match_settings = '../../../tests/data/howto/trino/test_no_matching_trino.json'
no_match_settings

'../../../tests/data/howto/trino/test_no_matching_trino.json'

In [16]:
# Create a file object
file_no_match = File(no_match_settings)

In [17]:
# Call the create_data_source_from_settings() method by passing the File
db_no_matching = etl_proc_obj.create_data_source(file_no_match)

2022-07-21 15:40:21,943 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
2022-07-21 15:40:21,945 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00157s] ('esg_matching',)
2022-07-21 15:40:24,131 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
2022-07-21 15:40:24,132 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00115s] ('esg_matching',)
2022-07-21 15:40:26,531 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:26,532 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00091s] ('esg_matching', 'esg_match_tgt2')
2022-07-21 15:40:27,783 INFO sqlalchemy.engine.Engine SELECT
    "column_name",
    "data_type",
    "column_default",
    UPPER("is_nullable") AS "is_nullable"
FROM "informatio

  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)


2022-07-21 15:40:33,587 INFO sqlalchemy.engine.Engine ROLLBACK
2022-07-21 15:40:33,588 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:33,590 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00136s] ('esg_matching', 'esg_match_ref')
2022-07-21 15:40:34,988 INFO sqlalchemy.engine.Engine SELECT
    "column_name",
    "data_type",
    "column_default",
    UPPER("is_nullable") AS "is_nullable"
FROM "information_schema"."columns"
WHERE "table_schema" = ?
  AND "table_name" = ?
ORDER BY "ordinal_position" ASC
2022-07-21 15:40:34,989 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00084s] ('esg_matching', 'esg_match_ref')
2022-07-21 15:40:36,444 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:36,445 INFO sqlalchemy.engine.Engine [diale

  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)


2022-07-21 15:40:40,441 INFO sqlalchemy.engine.Engine ROLLBACK
2022-07-21 15:40:40,442 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:40,443 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00118s] ('esg_matching', 'esg_match_tgt1')
2022-07-21 15:40:41,920 INFO sqlalchemy.engine.Engine SELECT
    "column_name",
    "data_type",
    "column_default",
    UPPER("is_nullable") AS "is_nullable"
FROM "information_schema"."columns"
WHERE "table_schema" = ?
  AND "table_name" = ?
ORDER BY "ordinal_position" ASC
2022-07-21 15:40:41,920 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00081s] ('esg_matching', 'esg_match_tgt1')
2022-07-21 15:40:43,924 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:43,925 INFO sqlalchemy.engine.Engine [dia

  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)


2022-07-21 15:40:47,975 INFO sqlalchemy.engine.Engine ROLLBACK
2022-07-21 15:40:47,976 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:47,978 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00156s] ('esg_matching', 'esg_matching')
2022-07-21 15:40:49,341 INFO sqlalchemy.engine.Engine SELECT
    "column_name",
    "data_type",
    "column_default",
    UPPER("is_nullable") AS "is_nullable"
FROM "information_schema"."columns"
WHERE "table_schema" = ?
  AND "table_name" = ?
ORDER BY "ordinal_position" ASC
2022-07-21 15:40:49,342 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00133s] ('esg_matching', 'esg_matching')
2022-07-21 15:40:50,931 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:50,932 INFO sqlalchemy.engine.Engine [dialect

  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)


2022-07-21 15:40:54,956 INFO sqlalchemy.engine.Engine ROLLBACK
2022-07-21 15:40:54,957 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:54,958 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00139s] ('esg_matching', 'esg_no_matching')
2022-07-21 15:40:56,813 INFO sqlalchemy.engine.Engine SELECT
    "column_name",
    "data_type",
    "column_default",
    UPPER("is_nullable") AS "is_nullable"
FROM "information_schema"."columns"
WHERE "table_schema" = ?
  AND "table_name" = ?
ORDER BY "ordinal_position" ASC
2022-07-21 15:40:56,814 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00100s] ('esg_matching', 'esg_no_matching')
2022-07-21 15:40:58,629 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:40:58,630 INFO sqlalchemy.engine.Engine [d

  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)


2022-07-21 15:41:03,275 INFO sqlalchemy.engine.Engine ROLLBACK
2022-07-21 15:41:03,276 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:41:03,278 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00160s] ('esg_matching', 'matching')
2022-07-21 15:41:04,717 INFO sqlalchemy.engine.Engine SELECT
    "column_name",
    "data_type",
    "column_default",
    UPPER("is_nullable") AS "is_nullable"
FROM "information_schema"."columns"
WHERE "table_schema" = ?
  AND "table_name" = ?
ORDER BY "ordinal_position" ASC
2022-07-21 15:41:04,718 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00081s] ('esg_matching', 'matching')
2022-07-21 15:41:06,339 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
  AND "table_name" = ?
2022-07-21 15:41:06,340 INFO sqlalchemy.engine.Engine [dialect trino+r

  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)
  metadata.reflect(self._engine)


2022-07-21 15:41:10,775 INFO sqlalchemy.engine.Engine ROLLBACK
2022-07-21 15:41:10,776 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-07-21 15:41:10,779 INFO sqlalchemy.engine.Engine 
DROP TABLE esg_matching.esg_no_matching
2022-07-21 15:41:10,780 INFO sqlalchemy.engine.Engine [no key 0.00178s] ()
2022-07-21 15:41:12,130 INFO sqlalchemy.engine.Engine COMMIT
2022-07-21 15:41:12,133 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-07-21 15:41:12,134 INFO sqlalchemy.engine.Engine 
CREATE TABLE esg_matching.esg_no_matching (
	timestamp TIMESTAMP(6), 
	matching_id BIGINT, 
	tgt_name VARCHAR, 
	tgt_id VARCHAR, 
	tgt_company VARCHAR, 
	tgt_country VARCHAR, 
	isin VARCHAR, 
	lei VARCHAR, 
	sedol VARCHAR
)


2022-07-21 15:41:12,136 INFO sqlalchemy.engine.Engine [no key 0.00131s] ()
2022-07-21 15:41:13,086 INFO sqlalchemy.engine.Engine COMMIT


In [19]:
# Retrieve the attribute names of the database table
db_no_matching.get_attribute_names()

['timestamp',
 'matching_id',
 'tgt_name',
 'tgt_id',
 'tgt_company',
 'tgt_country',
 'isin',
 'lei',
 'sedol']

## 5. Disconnect

In [20]:
db_conn.disconnect()

In [21]:
db_conn.is_connected()

False