# How to...create Matching and No-Matching Tables in Trino

In [1]:
# Sets up the location of the api relative to this notebook 
import sys
sys.path.append('../../../')

In [2]:
# Import the module for connection to a sqllite database
from esg_matching.engine.connectors.trino import TrinoConnector

In [3]:
# Import the modules for file management
from esg_matching.file_reader.file import File
from esg_matching.file_reader.csv_reader import FileReaderCsv

In [4]:
# Import the modules for the etl processing: reading, transformation and loading data to a database
from esg_matching.processing.etl import EtlProcessing

## 1. Database setup

In [5]:
# Import the module for connection to a sqllite database
from esg_matching.engine.connectors.trino import TrinoConnector

In [6]:
import os
user_trino = os.environ['TRINO_USER']
pwd_trino = os.environ['TRINO_PASSWD']
host_trino = os.environ['TRINO_HOST']
port_trino = int(os.environ['TRINO_PORT'])

In [7]:
# The database connector is represented by the class SqlLiteConnector 
db_conn = TrinoConnector()

In [8]:
# The connect() method of the SqlLiteConnector is used to stablish a connection with the database if it exists, 
# or to create a new one. The property path_db defines the location and name of the database.
# The  property show_sql_statement indicates if the SQL statements are echoed (or printed) in the default output channel.
db_conn.username = user_trino
db_conn.user_password = pwd_trino
db_conn.host_url = host_trino
db_conn.port_number = port_trino
db_conn.catalog = 'osc_datacommons_iceberg_dev'
db_conn.show_sql_statement = True
db_conn.connect()

2022-07-17 18:45:33,665 INFO sqlalchemy.engine.Engine SELECT version()
2022-07-17 18:45:33,668 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00320s] ()


In [9]:
# Check if the connection was stablished
db_conn.is_connected()

True

## 2. Create ETL object

In [10]:
# Create an ETL process object
etl_proc_obj = EtlProcessing(db_conn)

## 3. Create Matching from file settings

In [11]:
# Settings for Macthing Table
match_settings = '../../../tests/data/howto/trino/test_matching_trino.json'
match_settings

'../../../tests/data/howto/trino/test_matching_trino.json'

In [12]:
# Create a file object
file_match = File(match_settings)

In [13]:
# Call the create_data_source_from_settings() method by passing the File
db_matching = etl_proc_obj.create_data_source(file_match)

2022-07-17 18:45:40,786 INFO sqlalchemy.engine.Engine SELECT "table_name"
FROM "information_schema"."tables"
WHERE "table_schema" = ?
2022-07-17 18:45:40,787 INFO sqlalchemy.engine.Engine [dialect trino+rest does not support caching 0.00132s] ('esg_matching',)
2022-07-17 18:45:42,150 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2022-07-17 18:45:42,151 INFO sqlalchemy.engine.Engine 
CREATE TABLE esg_matching.esg_matching (
	matching_id INTEGER, 
	ref_name VARCHAR, 
	tgt_name VARCHAR, 
	matching_type VARCHAR, 
	matching_scope VARCHAR, 
	matching_rule VARCHAR, 
	ref_id VARCHAR, 
	ref_company VARCHAR, 
	ref_country VARCHAR, 
	tgt_id VARCHAR, 
	tgt_company VARCHAR, 
	tgt_country VARCHAR, 
	isin VARCHAR, 
	lei VARCHAR, 
	sedol VARCHAR
)


2022-07-17 18:45:42,152 INFO sqlalchemy.engine.Engine [no key 0.00068s] ()
2022-07-17 18:45:43,122 INFO sqlalchemy.engine.Engine COMMIT


In [14]:
# Retrieve the attribute names of the database table
db_matching.get_attribute_names()

['matching_id',
 'ref_name',
 'tgt_name',
 'matching_type',
 'matching_scope',
 'matching_rule',
 'ref_id',
 'ref_company',
 'ref_country',
 'tgt_id',
 'tgt_company',
 'tgt_country',
 'isin',
 'lei',
 'sedol']

## 4. Create No-Matching from file settings

In [None]:
# Settings for No-Macthing Table
no_match_settings = '../../../tests/data/howto/trino/test_no_matching_trino.json'
no_match_settings

In [None]:
# Create a file object
file_no_match = File(no_match_settings)

In [None]:
# Call the create_data_source_from_settings() method by passing the File
db_no_matching = etl_proc_obj.create_data_source(file_no_match)

In [None]:
# Retrieve the attribute names of the database table
db_no_matching.get_attribute_names()

## 5. Disconnect

In [None]:
db_conn.disconnect()

In [None]:
db_conn.is_connected()