# Demo - Exact Matching

In [None]:
# Sets up the location of the api relative to this notebook 
import sys
sys.path.append('../../../')

In [None]:
# Import the module for connection to a sqllite database
from esg_matching.engine.connectors.sql_lite import SqlLiteConnector

In [None]:
# Import the modules for file management
from esg_matching.file_reader.file import File
from esg_matching.file_reader.csv_reader import FileReaderCsv

In [None]:
# Import the modules for the etl processing: reading, transformation and loading data to a database
from esg_matching.processing.etl_processing import EtlProcessing

## 1. Database setup

In [None]:
# Localization of the database to be created in relation to this jupyter notebook
# The database will be created in the /data/dabase folder, under the project main folder
path_db = '../../../tests/data/notebook/demo/test_esg_matching.db'

In [None]:
# The database connector is represented by the class SqlLiteConnector 
db_conn = SqlLiteConnector()

In [None]:
# The connect() method of the SqlLiteConnector is used to stablish a connection with the database if it exists, 
# or to create a new one. The property path_db defines the location and name of the database.
# The  property show_sql_statement indicates if the SQL statements are echoed (or printed) in the default output channel.
db_conn.path_db = path_db
db_conn.show_sql_statement = True
db_conn.connect()

In [None]:
# Check if the connection was stablished
db_conn.is_connected()

## 2. Load the data into database from csv files

In [None]:
# Create an ETL process object
etl_proc_obj = EtlProcessing(db_conn)

In [None]:
# Crete a file reader object for csv files
csv_reader_obj = FileReaderCsv()  

In [None]:
# Referential Data Source
ref_settings = '../../../tests/data/notebook/demo/test_ref_sqlite.json'
ref_obj = File(ref_settings)

In [None]:
# Load data from REFERENCIAL
db_ref = etl_proc_obj.load_file_to_db(ref_obj, csv_reader_obj)

In [None]:
# Target Data Sources
tgt1_settings = '../../../tests/data/notebook/demo/test_ds1_sqlite.json'
tgt1_obj = File(tgt1_settings)

In [None]:
# Load data from TARGET 1
db_tgt1 = etl_proc_obj.load_file_to_db(tgt1_obj, csv_reader_obj)

In [None]:
# Target Data Sources
tgt2_settings = '../../../tests/data/notebook/demo/test_ds2_sqlite.json'
tgt2_obj = File(tgt2_settings)

In [None]:
# Load data from TARGET 1
db_tgt2 = etl_proc_obj.load_file_to_db(tgt2_obj, csv_reader_obj)

## 3. Creating matching/no-matching tables

In [None]:
match_settings = '../../../tests/data/notebook/demo/test_matching_sqlite.json'
no_match_settings = '../../../tests/data/notebook/demo/test_no_matching_sqlite.json'

In [None]:
file_match = File(match_settings)
file_no_match = File(no_match_settings)

In [None]:
# Call the create_data_source_from_settings() method by passing the File
db_matching = etl_proc_obj.create_data_source(file_match)

In [None]:
# Call the create_data_source_from_settings() method by passing the File
db_no_matching = etl_proc_obj.create_data_source(file_no_match)

## 4. Checking matching policy and aliases

In [None]:
db_tgt1.get_policy_definition()

In [None]:
db_tgt2.get_policy_definition()

In [None]:
db_ref.get_mapping_to_alias()

In [None]:
db_tgt1.get_mapping_to_alias()

In [None]:
db_tgt2.get_mapping_to_alias()

## 5. Checking attribute mapping between sources and matching tables

In [None]:
db_ref.get_mapping_to_matching()

In [None]:
db_tgt1.get_mapping_to_matching()

In [None]:
db_tgt2.get_mapping_to_matching()

## 6. Create policies for matching each target data source with the referential

In [None]:
# Import policy module
from esg_matching.matcher.policy import MatchingPolicy

In [None]:
# Create macthing policy object for target1
policy_match_tgt1 = MatchingPolicy(db_tgt1, 'matching_with_ref1')

In [None]:
# Set the referential and matching/no-matching sources
policy_match_tgt1.set_referential_source(db_ref)
policy_match_tgt1.set_matching_source(db_matching)
policy_match_tgt1.set_no_matching_source(db_no_matching)

In [None]:
# Create macthing policy object for target2
policy_match_tgt2 = MatchingPolicy(db_tgt2, 'matching_with_ref1')

In [None]:
# Set the referential and matching/no-matching sources
policy_match_tgt2.set_referential_source(db_ref)
policy_match_tgt2.set_matching_source(db_matching)
policy_match_tgt2.set_no_matching_source(db_no_matching)

## 7. Perform direct full matching (DFM) for each data source

In [None]:
# Import DFM module
from esg_matching.matcher.dfm import DbMatcherDfm

In [None]:
# Create a matcher object for DFM
dfm_matcher_obj = DbMatcherDfm(db_conn)

In [None]:
# Perform DFM on target 1
dfm_matcher_obj.set_policy(policy_match_tgt1)
dfm_matcher_obj.execute_matching()

In [None]:
# Perform DFM on target 2
dfm_matcher_obj.set_policy(policy_match_tgt2)
dfm_matcher_obj.execute_matching()

## 8. Perform direct residual matching (DRM) for each data source

In [None]:
# Import DRM module
from esg_matching.matcher.drm import DbMatcherDrm

In [None]:
# Create a matcher object for DRM
drm_matcher_obj = DbMatcherDrm(db_conn)

In [None]:
# Perform DRM on target 1
drm_matcher_obj.set_policy(policy_match_tgt1)
drm_matcher_obj.execute_matching()

In [None]:
# Perform DRM on target 2
drm_matcher_obj.set_policy(policy_match_tgt2)
drm_matcher_obj.execute_matching()

## 9. Perform indirect full matching (IFM) for each data source

In [None]:
# Import IFM module
from esg_matching.matcher.irm import DbMatcherIrm

In [None]:
# Create a matcher object for IFM
irm_matcher_obj = DbMatcherIrm(db_conn)

In [None]:
# Perform IFM on target 1
irm_matcher_obj.set_policy(policy_match_tgt1)
irm_matcher_obj.execute_matching()

In [None]:
# Perform IFM on target 2
irm_matcher_obj.set_policy(policy_match_tgt2)
irm_matcher_obj.execute_matching()

## 10. Saving the matching table as .csv file

In [None]:
import pandas as pd

In [None]:
df_matching = db_matching.get_data_as_df()

In [None]:
df_matching

In [None]:
df_matching.to_csv('my_matching.csv', index=False)

## 11. Close database connection

In [None]:
db_conn.disconnect()

In [None]:
db_conn.is_connected()