## Data from File 5.1 Suppliers Payees

### Step 1. Load required packag

In [2]:
import pandas as pd
import pandasql as ps
from pathlib import Path

### Step 2 - Get sample from source file

In [3]:
sourceFolder = '/content/drive/Shareddrives/Esperanto/Supports/Suppliers/5_Suppliers_Payees_BankAccounts'
workFolder = '/content/drive/Shareddrives/Esperanto/Supports/Suppliers/Work/5_Bank_Accounts'
fileName = '5.1. SUPPLIER_PAYEES_COPPEL.csv'
inputTextFile = open(Path(sourceFolder, fileName), 'r', encoding='latin-1')
if inputTextFile:
    numTextLines = 0
    while True:
        numTextLines += 1
        textLine = inputTextFile.readline()
        print(textLine)
        if numTextLines >10:
            break
    inputTextFile.close()

'SISTEMA_LEGADO';'NUMERO_PROVEEDOR';'FEEDER_IMPORT_BATCH_ID';'TEMP_EXT_PAYEE_ID';'BUSINESS_UNIT';'VENDOR_NUM';'VENDOR_SITE_CODE';'EXCLUSIVE_PAYMENT_FLAG';'DEFAULT_PAYMENT_METHOD_CODE';'DELIVERY_CHANNEL_CODE';'SETTLEMENT_PRIORITY';'REMIT_ADVICE_DELIVERY_METHOD';'REMIT_ADVICE_EMAIL';'REMIT_ADVICE_FAX';'BANK_INSTRUCTION1_CODE';'BANK_INSTRUCTION2_CODE';'BANK_INSTRUCTION_DETAILS';'PAYMENT_REASON_CODE';'PAYMENT_REASON_COMMENTS';'PAYMENT_TEXT_MESSAGE1';'PAYMENT_TEXT_MESSAGE2';'PAYMENT_TEXT_MESSAGE3';'BANK_CHARGE_BEARER'

'SAJ';'3';'100';'7';'BU_CSACV';'';'SJ3MXN004';'N';'Transferencia';'';'';'';'';'';'';'';'';'';'';'';'';'';''

'OBRAS';'GAVE860528F18';'100';'9';'BU_CSACV';'0 VERGARA EFRAIN';'SOGAVE860528F18';'N';'Transferencia';'';'';'';'';'';'';'';'';'';'';'';'';'';''

'OBRAS';'CST050404CH3';'100';'11';'BU_CSACV';'04 ST SC';'SOCST050404CH3';'N';'Transferencia';'';'';'';'';'';'';'';'';'';'';'';'';'';''

'GASTOS';'BIO111108JC6';'100';'12';'BU_CSACV';'100% BIORESPONSABLES SA DE CV';'GTBIO111108

### Step 3 - Load supplier payee data

In [4]:
df = pd.read_csv(Path(sourceFolder,fileName), sep=';', quotechar=chr(39), encoding='latin-1')
df.dtypes

SISTEMA_LEGADO                   object
NUMERO_PROVEEDOR                 object
FEEDER_IMPORT_BATCH_ID            int64
TEMP_EXT_PAYEE_ID                 int64
BUSINESS_UNIT                    object
VENDOR_NUM                       object
VENDOR_SITE_CODE                 object
EXCLUSIVE_PAYMENT_FLAG           object
DEFAULT_PAYMENT_METHOD_CODE      object
DELIVERY_CHANNEL_CODE           float64
SETTLEMENT_PRIORITY             float64
REMIT_ADVICE_DELIVERY_METHOD    float64
REMIT_ADVICE_EMAIL              float64
REMIT_ADVICE_FAX                float64
BANK_INSTRUCTION1_CODE          float64
BANK_INSTRUCTION2_CODE          float64
BANK_INSTRUCTION_DETAILS        float64
PAYMENT_REASON_CODE             float64
PAYMENT_REASON_COMMENTS         float64
PAYMENT_TEXT_MESSAGE1           float64
PAYMENT_TEXT_MESSAGE2           float64
PAYMENT_TEXT_MESSAGE3           float64
BANK_CHARGE_BEARER              float64
dtype: object

In [5]:
print(df.head(10))

  SISTEMA_LEGADO NUMERO_PROVEEDOR  FEEDER_IMPORT_BATCH_ID  TEMP_EXT_PAYEE_ID  \
0            SAJ                3                     100                  7   
1          OBRAS    GAVE860528F18                     100                  9   
2          OBRAS     CST050404CH3                     100                 11   
3         GASTOS     BIO111108JC6                     100                 12   
4          OBRAS     BIO111108JC6                     100                 13   
5          OBRAS     CDC150325SZ0                     100                 16   
6         GASTOS     DSE090511EW5                     100                 18   
7          OBRAS     DSE090511EW5                     100                 19   
8         GASTOS     TVC8607234U2                     100                 24   
9          OBRAS     TVC8607234U2                     100                 23   

  BUSINESS_UNIT                        VENDOR_NUM VENDOR_SITE_CODE  \
0      BU_CSACV                               NaN

### Step 4 - Validate Tax ID by "NUMERO PROVEEDOR"

In [6]:
modDf = df
modDf['TAXID_VALIDATION'] = False
for index, row in modDf.iterrows():
    taxID = row['NUMERO_PROVEEDOR']
    if len(taxID)==12 or len(taxID)==13:
        modDf.loc[index, 'TAXID_VALIDATION'] = True
print(modDf.head(10))

  SISTEMA_LEGADO NUMERO_PROVEEDOR  FEEDER_IMPORT_BATCH_ID  TEMP_EXT_PAYEE_ID  \
0            SAJ                3                     100                  7   
1          OBRAS    GAVE860528F18                     100                  9   
2          OBRAS     CST050404CH3                     100                 11   
3         GASTOS     BIO111108JC6                     100                 12   
4          OBRAS     BIO111108JC6                     100                 13   
5          OBRAS     CDC150325SZ0                     100                 16   
6         GASTOS     DSE090511EW5                     100                 18   
7          OBRAS     DSE090511EW5                     100                 19   
8         GASTOS     TVC8607234U2                     100                 24   
9          OBRAS     TVC8607234U2                     100                 23   

  BUSINESS_UNIT                        VENDOR_NUM VENDOR_SITE_CODE  \
0      BU_CSACV                               NaN

### Step 5 - Creates a subset with elements whose taxt id and bank account are valid

In [7]:
subSetDf = modDf[modDf['TAXID_VALIDATION']==True]
print(len(modDf.index) ,' records in original dataset')
print(len(subSetDf.index), ' records with valid taxt id')

186261  records in original dataset
178796  records with valid taxt id


### Steps6 - Get valid taxID from 5.2 dataset

In [16]:
tmpTaxIdDf = pd.read_csv(Path(workFolder,'5_2_Bank_Accounts.csv'), encoding='utf-8')
print(list(tmpTaxIdDf.columns))
tmpValidTaxIds = tmpTaxIdDf['NUMERO_PROVEEDOR'].unique()
tmpDict = {'NUMERO_PROVEEDOR': tmpValidTaxIds}
validTaxIdsDf = pd.DataFrame(tmpDict)
print(len(validTaxIdsDf.index), ' valid tax ids loaded.')

['Unnamed: 0', 'SISTEMA_LEGADO', 'NUMERO_PROVEEDOR', 'FEEDER_IMPORT_BATCH_ID', 'TEMP_EXT_PAYEE_ID', 'TEMP_EXT_BANK_ACCT_ID', 'BANK_NAME', 'BRANCH_NAME', 'COUNTRY_CODE', 'BANK_ACCOUNT_NUM', 'CURRENCY_CODE', 'FOREING_PAYMENT_USE_FLAG', 'ATTRIBUTE1']
74641  valid tax ids loaded.
