# How to...perform exact matching on Oracle database

In [1]:
# Sets up the location of the api relative to this notebook 
import sys
sys.path.append('../../../')

In [2]:
# Import the module for connection to a sqllite database
from esgmatching.db_engine.engines.connector_oracle import OracleConnector

In [3]:
# Import the modules for file management
from esgmatching.file_reader.file import File
from esgmatching.file_reader.file_reader_csv import FileReaderCsv

In [4]:
# Import the modules for the etl processing: reading, transformation and loading data to a database
from esgmatching.processing.etl_processing import EtlProcessing

## 1. Database setup

In [5]:
# The database connector is represented by the class OracleConnector 
db_conn = OracleConnector()

In [6]:
# Setting upt the properties
db_conn.client_driver_dir ='C:\oracle\instantclient_21_3'
db_conn.username ='admin'
db_conn.user_password ='oraclebnp'
db_conn.host_url ='esgmatching.ctqjxnfdw57h.eu-central-1.rds.amazonaws.com'
db_conn.port_number ='1521'
db_conn.service_name ='DATABASE'
db_conn.show_sql_statement = False

In [7]:
# The connect() method of the OracleConnector is used to stablish a connection with the database. 
db_conn.connect()

In [8]:
# Check if the connection was stablished
db_conn.is_connected()

True

## 2. Load the data into database from csv files

In [9]:
# Create an ETL process object
etl_proc_obj = EtlProcessing(db_conn)

In [10]:
# Crete a file reader object for csv files
csv_reader_obj = FileReaderCsv()  

In [11]:
# Referential Data Source
ref_settings = '../../../tests/data/oracle/test_referential1_oracle.json'
ref_obj = File(ref_settings)

In [12]:
# Load data from REFERENCIAL
db_ref = etl_proc_obj.load_file_to_db(ref_obj, csv_reader_obj)

../../../tests/data/test_referential1.csv
{'UNIQUE_ID': '1', 'ISIN': 'SK1120005824', 'COMPANY': 'CENTRAL PERK', 'COUNTRY': 'SK'}
{'UNIQUE_ID': '2', 'ISIN': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'COMPANY': 'HONEYDUKES', 'COUNTRY': 'UNITED STATES OF AMERICA'}
{'UNIQUE_ID': '3', 'ISIN': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'COMPANY': 'STARCOURT MALL', 'COUNTRY': 'AUSTRIA'}
{'UNIQUE_ID': '4', 'ISIN': 'GB00B1YW4409', 'COMPANY': 'STERLING COOPER', 'COUNTRY': 'GBR'}
{'UNIQUE_ID': '5', 'ISIN': 'CH0012221716', 'COMPANY': 'Bluth company', 'COUNTRY': 'CHE'}
{'UNIQUE_ID': '6', 'ISIN': 'US0200021014', 'COMPANY': 'InGen', 'COUNTRY': 'usa'}
{'UNIQUE_ID': '7', 'ISIN': 'US0231351067', 'COMPANY': 'Stark Industries', 'COUNTRY': 'us'}
{'UNIQUE_ID': '8', 'ISIN': 'US0126531013', 'COMPANY': 'SPECTRE', 'COUNTRY': 'USA'}
{'UNIQUE_ID': '9', 'ISIN': 'US0126531013', 'COMPANY': 'SPECTRE 33 SUBSIDIARY', 'COUNTRY': 'USA'}


In [13]:
# Target Data Sources
tgt1_settings = '../../../tests/data/oracle/test_target1_oracle.json'
tgt1_obj = File(tgt1_settings)

In [14]:
# Load data from TARGET 1
db_tgt1 = etl_proc_obj.load_file_to_db(tgt1_obj, csv_reader_obj)

../../../tests/data/test_target1.csv
{'UNIQUE_ID': '1', 'ISIN': 'SK1120005824', 'LEI': '097900BHK10000084115', 'COMPANY': 'CENTRAL PERK AND SONS', 'COUNTRY': 'DE'}
{'UNIQUE_ID': '2', 'ISIN': 'DE0005545503', 'LEI': '5299003VKVDCUPSS5X23', 'COMPANY': 'DUNDER MIFFLINS [12345]', 'COUNTRY': 'de'}
{'UNIQUE_ID': '3', 'ISIN': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'LEI': '254900B1P3S786KDAW57', 'COMPANY': 'HONEYDUKES (adm@honeydukes.com)', 'COUNTRY': 'UNITED STATES OF AMERICA'}
{'UNIQUE_ID': '4', 'ISIN': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'LEI': '529900MVZ2YHFZV3K546', 'COMPANY': 'STARCOURT MALL', 'COUNTRY': 'AUSTRIA'}
{'UNIQUE_ID': '5', 'ISIN': 'GB00B1YW4409', 'LEI': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'COMPANY': 'Krusty Krab', 'COUNTRY': 'GB'}
{'UNIQUE_ID': '6', 'ISIN': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'LEI': '3003007MR0P683GYR674', 'COMPANY': 'RR DINER', 'COUNTRY': 'CHINA'}
{'UNIQUE_ID': '7'

In [15]:
# Target Data Sources
tgt2_settings = '../../../tests/data/oracle/test_target2_oracle.json'
tgt2_obj = File(tgt2_settings)

In [16]:
# Load data from TARGET 1
db_tgt2 = etl_proc_obj.load_file_to_db(tgt2_obj, csv_reader_obj)

../../../tests/data/test_target2.csv
{'UNIQUE_ID': '1', 'ISIN': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'LEI': '097900BHK10000084115', 'SEDOL': '7108899', 'COMPANY': 'HONEYDUKES', 'COUNTRY': 'UNITED STATES'}
{'UNIQUE_ID': '2', 'ISIN': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'LEI': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'SEDOL': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'COMPANY': 'STARCOURT MALL', 'COUNTRY': 'AUSTRIA'}
{'UNIQUE_ID': '3', 'ISIN': 'CH0012221716', 'LEI': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'SEDOL': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'COMPANY': 'Bluth company', 'COUNTRY': 'CHE'}
{'UNIQUE_ID': '4', 'ISIN': 'US0200021014', 'LEI': <sqlalchemy.sql.elements.Null object at 0x000002307FE31CA0>, 'SEDOL': '2019952', 'COMPANY': 'InGen', 'COUNTRY': 'usa'}
{'UNIQUE_ID': '5', 'ISIN': 'US0126531013', 'LEI': <sqlalchemy.sql.elements.Null object at 0x000002

## 3. Creating matching/no-matching tables

In [17]:
match_settings = '../../../tests/data/oracle/test_matching_oracle.json'
no_match_settings = '../../../tests/data/oracle/test_no_matching_oracle.json'

In [18]:
file_match = File(match_settings)
file_no_match = File(no_match_settings)

In [19]:
# Call the create_data_source_from_settings() method by passing the File
db_matching = etl_proc_obj.create_data_source(file_match)
db_no_matching = etl_proc_obj.create_data_source(file_no_match)

## 4. Checking attribute mapping alias

In [20]:
db_ref.get_mapping_to_alias()

{'ISIN': Column('ISIN', VARCHAR2(length=12), table=<ESG_MATCH_REF>),
 'COMPANY': Column('COMPANY', VARCHAR2(length=255), table=<ESG_MATCH_REF>),
 'COUNTRY': Column('COUNTRY', VARCHAR2(length=255), table=<ESG_MATCH_REF>)}

In [21]:
db_tgt1.get_mapping_to_alias()

{'ISIN': Column('ISIN', VARCHAR2(length=12), table=<ESG_MATCH_TGT1>),
 'LEI': Column('LEI', VARCHAR2(length=20), table=<ESG_MATCH_TGT1>),
 'COMPANY': Column('COMPANY', VARCHAR2(length=255), table=<ESG_MATCH_TGT1>),
 'COUNTRY': Column('COUNTRY', VARCHAR2(length=255), table=<ESG_MATCH_TGT1>)}

In [22]:
db_tgt2.get_mapping_to_alias()

{'ISIN': Column('ISIN', VARCHAR2(length=12), table=<ESG_MATCH_TGT2>),
 'LEI': Column('LEI', VARCHAR2(length=20), table=<ESG_MATCH_TGT2>),
 'SEDOL': Column('SEDOL', VARCHAR2(length=20), table=<ESG_MATCH_TGT2>),
 'COMPANY': Column('COMPANY', VARCHAR2(length=255), table=<ESG_MATCH_TGT2>),
 'COUNTRY': Column('COUNTRY', VARCHAR2(length=255), table=<ESG_MATCH_TGT2>)}

## 5. Checking attribute mapping between sources and matching tables

In [23]:
db_ref.get_mapping_to_matching()

{'REF_ID': Column('UNIQUE_ID', NUMBER(asdecimal=False), table=<ESG_MATCH_REF>, primary_key=True, nullable=False),
 'REF_COMPANY': Column('COMPANY', VARCHAR2(length=255), table=<ESG_MATCH_REF>),
 'REF_COUNTRY': Column('COUNTRY', VARCHAR2(length=255), table=<ESG_MATCH_REF>)}

In [24]:
db_tgt1.get_mapping_to_matching()

{'TGT_ID': Column('UNIQUE_ID', NUMBER(asdecimal=False), table=<ESG_MATCH_TGT1>, primary_key=True, nullable=False),
 'ISIN': Column('ISIN', VARCHAR2(length=12), table=<ESG_MATCH_TGT1>),
 'LEI': Column('LEI', VARCHAR2(length=20), table=<ESG_MATCH_TGT1>),
 'TGT_COMPANY': Column('COMPANY', VARCHAR2(length=255), table=<ESG_MATCH_TGT1>),
 'TGT_COUNTRY': Column('COUNTRY', VARCHAR2(length=255), table=<ESG_MATCH_TGT1>)}

In [25]:
db_tgt2.get_mapping_to_matching()

{'TGT_ID': Column('UNIQUE_ID', NUMBER(asdecimal=False), table=<ESG_MATCH_TGT2>, primary_key=True, nullable=False),
 'ISIN': Column('ISIN', VARCHAR2(length=12), table=<ESG_MATCH_TGT2>),
 'LEI': Column('LEI', VARCHAR2(length=20), table=<ESG_MATCH_TGT2>),
 'SEDOL': Column('SEDOL', VARCHAR2(length=20), table=<ESG_MATCH_TGT2>),
 'TGT_COMPANY': Column('COMPANY', VARCHAR2(length=255), table=<ESG_MATCH_TGT2>),
 'TGT_COUNTRY': Column('COUNTRY', VARCHAR2(length=255), table=<ESG_MATCH_TGT2>)}

## 6. Create policies for matching each target data source with the referential

In [26]:
# Import policy module
from esgmatching.db_matcher.matching_policy import MatchingPolicy

In [27]:
# Create macthing policy object for target1
policy_match_tgt1 = MatchingPolicy(db_tgt1, 'matching_with_ref1')

In [28]:
# Set the referential and matching/no-matching sources
policy_match_tgt1.set_referential_source(db_ref)
policy_match_tgt1.set_matching_source(db_matching)
policy_match_tgt1.set_no_matching_source(db_no_matching)

In [29]:
# Create macthing policy object for target2
policy_match_tgt2 = MatchingPolicy(db_tgt2, 'matching_with_ref1')

In [30]:
# Set the referential and matching/no-matching sources
policy_match_tgt2.set_referential_source(db_ref)
policy_match_tgt2.set_matching_source(db_matching)
policy_match_tgt2.set_no_matching_source(db_no_matching)

## 7. Perform direct full matching (DFM) for each data source

In [31]:
# Import DFM module
from esgmatching.db_matcher.db_matcher_dfm import DbMatcherDfm

In [32]:
# Create a matcher object for DFM
dfm_matcher_obj = DbMatcherDfm(db_conn)

In [33]:
# Perform DFM on target 1
dfm_matcher_obj.set_policy(policy_match_tgt1)
dfm_matcher_obj.execute_matching()

In [34]:
# Perform DFM on target 2
dfm_matcher_obj.set_policy(policy_match_tgt2)
dfm_matcher_obj.execute_matching()

## 8. Close database connection

In [35]:
db_conn.disconnect()

In [36]:
db_conn.is_connected()

False