# How to...validate banking IDs

This notebook shows how to use the entitymatching api to validate banking ids, such as ISIN, LEI and SEDOL.

In [1]:
# Sets up the location of the api relative to this notebook 
import sys
sys.path.append('../../')

In [2]:
# Import the module for normalizing banking id information
from financial_entity_cleaner.id_cleaner import banking_id

In [3]:
# Create a CountryCleaner object
id_cleaner_obj=banking_id.BankingIdCleaner()

In [4]:
# Check the types of validation available
id_cleaner_obj.get_types_id_validations()

['lei', 'isin', 'sedol']

## 1. Basic usage

By default, the API assumes that the value passed as parameter is an ISIN code.
The API returns:
- None if the value is not a string or has no characters in it.
- True if the value is a valid ID of the specified type
- False if the value is not a valid ID of the specified type

In [5]:
id_cleaner_obj.id_type

'isin'

In [6]:
# Testing a valid ISIN code
id_cleaner_obj.is_valid_id('GB00B1YW4409')

True

In [7]:
# Testing an invalid ISIN code
id_cleaner_obj.is_valid_id('tttt0B1YW4409')

False

## 2. Working with other ID types (LEI and SEDOL)

In [8]:
id_cleaner_obj.id_type='lei'

In [9]:
# Testing a valid LEI code
id_cleaner_obj.is_valid_id('969500DPKGC9JE9F0820')

True

In [10]:
id_cleaner_obj.id_type='sedol'

In [11]:
# Testing a valid SEDOL code
id_cleaner_obj.is_valid_id('2595708')

True

The library throws an exception id the type is not supported.

In [12]:
id_cleaner_obj.id_type='test'

TypeOfBankingIdNotSupported: The type of banking id informed is not supported.

## 3. Cleaning and Validating

The library can also be used for cleaning and validation at the same time. In this case, it will return a list of values where list[0] indicates if the id is valid or not and list[1] returns the cleaner ID.

In [13]:
id_cleaner_obj.id_type='lei'

In [14]:
# Cleaning a valid LEI code
clean_lei = id_cleaner_obj.get_clean_id('969500DPKGC9JE9F0820')
clean_lei

{'id_cleaned': '969500DPKGC9JE9F0820', 'id_validated': True}

By default, if the id is invalid get_clean_id() returns False

In [15]:
# Cleaning an invalid LEI code
clean_lei = id_cleaner_obj.get_clean_id('96XX00DPKGC9JE9F0820')
clean_lei

{'id_cleaned': '96XX00DPKGC9JE9F0820', 'id_validated': False}

If this behaviour is not required, set the parameter set_null_for_invalid=True

In [16]:
id_cleaner_obj.set_null_for_invalid_ids = True

In [17]:
# Cleaning an invalid LEI code
clean_lei = id_cleaner_obj.get_clean_id('96XX00DPKGC9JE9F0820')
clean_lei

{'id_cleaned': nan, 'id_validated': False}

## 4. Cleaning a dataframe

In [18]:
import pandas as pd

In [19]:
input_filename = '../../tests/data/test_cleaner_ids.csv'

In [20]:
df_original = pd.read_csv(input_filename,sep=',',encoding='utf-8')

In [21]:
df_original

Unnamed: 0,ID,ID_TYPE,RESULT_VALIDATION
0,SK1120005824,isin,True
1,097900BHK10000084115,lei,True
2,DE0005545503,isin,True
3,5299003VKVDCUPSS5X23,lei,True
4,254900B1P3S786KDAW57,lei,True
5,529900MVZ2YHFZV3K546,lei,True


In [22]:
# Set up the resultant letter case
id_cleaner_obj.output_lettercase='upper'

In [23]:
id_cleaner_obj.id_type='lei'

In [24]:
df_cleaner = id_cleaner_obj.apply_cleaner_to_df(df_original, 'ID', 'clean', 'valid')

In [25]:
df_cleaner

Unnamed: 0,ID,ID_TYPE,RESULT_VALIDATION,ID_clean,ID_valid
0,SK1120005824,isin,True,,False
1,097900BHK10000084115,lei,True,097900BHK10000084115,True
2,DE0005545503,isin,True,,False
3,5299003VKVDCUPSS5X23,lei,True,5299003VKVDCUPSS5X23,True
4,254900B1P3S786KDAW57,lei,True,254900B1P3S786KDAW57,True
5,529900MVZ2YHFZV3K546,lei,True,529900MVZ2YHFZV3K546,True


In [26]:
# Not setting null for invalid ids
id_cleaner_obj.set_null_for_invalid_ids = False

In [27]:
df_cleaner = id_cleaner_obj.apply_cleaner_to_df(df_original, 'ID', 'clean', 'valid')

In [28]:
df_cleaner

Unnamed: 0,ID,ID_TYPE,RESULT_VALIDATION,ID_clean,ID_valid
0,SK1120005824,isin,True,SK1120005824,False
1,097900BHK10000084115,lei,True,097900BHK10000084115,True
2,DE0005545503,isin,True,DE0005545503,False
3,5299003VKVDCUPSS5X23,lei,True,5299003VKVDCUPSS5X23,True
4,254900B1P3S786KDAW57,lei,True,254900B1P3S786KDAW57,True
5,529900MVZ2YHFZV3K546,lei,True,529900MVZ2YHFZV3K546,True


## 5. Cleaning a csv file with AutoCleaner

In [29]:
# Import the module for normalizing country information
from financial_entity_cleaner.auto_cleaner import auto_cleaner

In [30]:
# Create an AutoCleaner object
auto_cleaner_obj=auto_cleaner.AutoCleaner()

C:\Users\Patrycia\OneDrive\Projects\GitHub\os_climate\financial-entity-cleaner\financial_entity_cleaner\auto_cleaner\logs


In [31]:
input_filename = '../../tests/data/test_cleaner_ids.csv'

In [32]:
setup_cleaning_filename = '../../tests/data/test_cleaner_ids.json'

In [33]:
output_filename = '../../tests/data/test_cleaner_ids_result.csv'

In [35]:
auto_cleaner_obj.clean_csv_file(input_filename, setup_cleaning_filename, output_filename)

True