# How to...perform exact matching on trustable attributes

In [None]:
# Sets up the location of the api relative to this notebook 
import sys
sys.path.append('/opt/app-root/src/esg-matching/')

In [None]:
# Import the module for accessing a database
from esgmatching.dbmanager.SqlEngine import SqlEngine

In [None]:
# Import the module for reading csv files
from esgmatching.reader.FileReaderCsvToDB import FileReaderCsvToDB

In [None]:
# Import the module for reading csv files
from esgmatching.matcher.ExactMatcherDB import ExactMatcherDB

## 1. Settings

In [None]:
# Localization of the database to be created in relation to this jupyter notebook
# The database will be created in the /data/dabase folder, under the project main folder (EntityMatching)
path_db = '/opt/app-root/src/esg-matching/data/database/'

In [None]:
# String connection used for sqlite. Others databases might require different information.
# In this example the connection is a combination of [sqlite statement] + [database path] + [database name]
str_connection = 'sqlite:///' + path_db + 'entitymatching.db'
str_connection

In [None]:
# The database engine object is created by passing the string connection 
sqlengine_obj = SqlEngine(str_connection)

In [None]:
# The connect() method of the SqlEngine is used to stablish a connection with the database if it exists, 
# or to create a new one, otherwise. The parameter show_echo is False by default and indicates if the SQL statements 
# are echoed (or printed) in the default output channel. Therefore, let's set show_echo = True to see the Sql statements. 
sqlengine_obj.connect(show_echo=True)

In [None]:
# Check if the connection was stablished
sqlengine_obj.is_connected()

## 2. Read the csv files into the database

In [None]:
# Path to the csv files and its mapping files
# Localization of the test files
path_test = '/opt/app-root/src/esg-matching/data/test/'

In [None]:
# CSV file for Data source 1 
file1_path = path_test + 'test_data_source1.csv'
file1_path

In [None]:
# Data mapping for Data source 1
file1_map = path_test + 'test_data_source1.json'
file1_map

In [None]:
# Initialize the FileReader
csvreader_obj = FileReaderCsvToDB()

In [None]:
# Set the database engine into the FileReader
csvreader_obj.set_database_engine(sqlengine_obj, use_session=True)

In [None]:
# Read 'test_data_source1.csv'
ref_data_source = csvreader_obj.read_file(file1_path, file1_map, delimiter=',', chunk_size=10)

In [None]:
# CSV file for Data source 2 
file2_path = path_test + 'test_data_source2.csv'
file2_path

In [None]:
# Data mapping for Data source 2
file2_map = path_test + 'test_data_source2.json'
file2_map

In [None]:
# Read 'test_data_source2.csv'
target_data_source = csvreader_obj.read_file(file2_path, file2_map, delimiter=',', chunk_size=10)

In [None]:
# CSV file for Data source 3 
file3_path = path_test + 'test_data_source3.csv'
file3_path

In [None]:
# Data mapping for Data source 3
file3_map = path_test + 'test_data_source3.json'
file3_map

In [None]:
# Read 'test_data_source3.csv'
target_data_source2 = csvreader_obj.read_file(file3_path, file3_map, delimiter=',', chunk_size=10)

## 3. Perform matching

In [None]:
matcher_obj = ExactMatcherDB('Matching_DS1_DS2',ref_data_source)

In [None]:
matcher_obj.add_target_data_sources(target_data_source)

In [None]:
matcher_obj.add_target_data_sources(target_data_source2)

In [None]:
matcher_obj.set_database_engine(sqlengine_obj)

In [None]:
matching_report_obj = matcher_obj.execute_matching('Matching_Table')

In [None]:
matcher_obj.execute_indirect_matching()

In [None]:
matching_report_obj.print_report()

## 5. Drop Tables

In [None]:
sqlengine_obj.drop_table(ref_data_source.table_obj)

In [None]:
sqlengine_obj.drop_table(target_data_source.table_obj)

In [None]:
sqlengine_obj.drop_table(target_data_source2.table_obj)

## 6. Close database connection

In [None]:
sqlengine_obj.disconnect()