# Synthetic ATM Electronic journal generator

* Generated with AI support
* Luis Gerardo Baeza, Aug 2024

In [35]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from google.cloud import bigquery

project_id = ""
dataset_id = f"bank_retail"
table_id = "atm_journal"


In [37]:
num_rows = 100000 #100K
locations = {
    'Santiago': (-33.4569400, -70.6482700),
    'Concepción': (-36.8269, -73.0501),
    'Valparaíso': (-33.0472, -71.6127),
    'Antofagasta': (-23.6500, -70.4000),
    'Viña del Mar': (-33.0245, -71.5546)
}
currencies = ['CLP', 'USD', 'EUR', 'JPY', 'GBP'] 
currency_weight = [0.7, 0.1, 0.1, 0.05, 0.05]
languages = ['Spanish', 'English']
banks = ['Santander', 'Banco de Chile', 'BCI', 'Scotiabank', 'Itaú']
banks_weight = [0.7, 0.1, 0.1, 0.05, 0.05]
countries = ['Chile', 'USA', 'Argentina', 'Peru', 'Brazil']
countries_weight = [0.7, 0.05, 0.10, 0.05, .10]
card_types = ['Debit', 'Credit']
card_brands = ['Visa', 'Mastercard', 'Redcompra']
transaction_types = ['Withdrawal', 'Deposit', 'Balance Inquiry', 'Transfer']
response_codes = ['00', '01', '05', '12', '51']
response_codes_weight = [0.60, 0.10, 0.10, 0.10, 0.10]

In [30]:
def insert_into_bigquery(project_id, dataset_id, table_id, df):
    client = bigquery.Client(project=project_id)

    # Get the table reference
    table_ref = client.dataset(dataset_id).table(table_id)

    # Configure the job
    job_config = bigquery.LoadJobConfig(
        # Autodetect the schema from the DataFrame
        autodetect=True,
        # Specify the write disposition (append to existing table, replace, etc.)
        write_disposition="WRITE_APPEND",  # Or WRITE_TRUNCATE, WRITE_EMPTY
        # Source format is CSV (you can change this if needed)
        source_format=bigquery.SourceFormat.CSV
    )

    # Load the data into BigQuery
    job = client.load_table_from_dataframe(
        df, table_ref, job_config=job_config
    )

    job.result()

    return len(df)

In [31]:
def generate_random_location(city):
    city = str(city) 
    center_lat, center_lon = locations[city]
    lat = np.random.uniform(center_lat - 0.1, center_lat + 0.1)
    lon = np.random.uniform(center_lon - 0.1, center_lon + 0.1)
    return f"{lat:.6f},{lon:.6f}"

In [32]:
def generate_atm_ejournal(num_rows):
    columns = [
        'Transaction_Datetime', 'Terminal_ID', 'Transaction_Type', 'Transaction_Amount',
        'Currency_Code', 'Card_Number_Masked', 'Account_Number_Masked', 'Authorization Code',
        'Response_Code', 'Transaction_Status', 'Customer_Language', 'ATM_Location', 'ATM_City', 'ATM_Country',
        'Card Issuer Country', 'Transaction Sequence Number', 'Journal_Sequence_Number',
        'Hardware Error Code', 'Software Error Code', 'Card Type', 'Card Brand', 'PIN Entry Attempts',
        'Cash Dispensed', 'Cash Deposited', 'Balance Before Transaction', 'Balance After Transaction',
        'Surcharge Amount', 'Fee Amount', 'Currency Conversion Rate', 'Network Fee', 'Bank Issuer'
    ]
    
    # Create a DataFrame with the specified number of rows and 31 columns (added Bank Issuer)
    ej_data = pd.DataFrame(np.nan, index=range(num_rows), columns=range(len(columns)))

    # Rename columns to meaningful names
    ej_data.columns = columns

    # Populate the DataFrame with sample data
    np.random.seed(0)  # For reproducibility

    # Transaction_Datetime
    now = datetime.now()
    ej_data['Transaction_Datetime'] = [
        (now - timedelta(minutes=i)).strftime('%Y-%m-%d %H:%M:%S') for i in range(num_rows)
    ]

    # Terminal_ID
    ej_data['Terminal_ID'] = ['ATM-' + str(i).zfill(5) for i in range(1, num_rows + 1)]

    # Transaction_Type
    ej_data['Transaction_Type'] = np.random.choice(transaction_types, num_rows)

    # Transaction_Amount, Currency_Code
    ej_data['Currency_Code'] = np.random.choice(currencies, num_rows, p=currency_weight)
    ej_data['Transaction_Amount'] = np.random.uniform(1000, 500000, num_rows).round(2) 

    # Card_Number_Masked, Account_Number_Masked
    ej_data['Card_Number_Masked'] = ['**** **** **** ' + str(i).zfill(4) for i in range(1000, 1000 + num_rows)]
    ej_data['Account_Number_Masked'] = ['**** **** **** **** ' + str(i).zfill(4) for i in range(2000, 2000 + num_rows)]

    # Authorization Code, Response_Code
    ej_data['Authorization Code'] = [str(i).zfill(6) for i in np.random.randint(100000, 999999, num_rows)]
    ej_data['Response_Code'] = np.random.choice(response_codes, num_rows, response_codes_weight)

    # Transaction_Status
    ej_data['Transaction_Status'] = np.where(ej_data['Response_Code'] == '00', 'Approved', 'Declined')

    # Customer_Language
    ej_data['Customer_Language'] = np.random.choice(languages, num_rows, p=[0.9, 0.1])

    # ATM_Location, ATM_Country
    cities = list(locations.keys())
    ej_data['ATM_City'] = np.random.choice(cities, num_rows)
    ej_data['ATM_Location'] = ej_data['ATM_City'].apply(generate_random_location)
    ej_data['ATM_Country'] = ['Chile'] * num_rows

    # Card Issuer Country
    ej_data['Card Issuer Country'] = np.random.choice(countries, num_rows, p=countries_weight)

    # Transaction Sequence Number, Journal_Sequence_Number
    ej_data['Transaction Sequence Number'] = range(1, num_rows + 1)
    ej_data['Journal_Sequence_Number'] = range(1001, 1001 + num_rows)

    # Hardware Error Code, Software Error Code
    error_codes = ['E001', 'E002', 'E003', 'None']
    ej_data['Hardware Error Code'] = np.random.choice(error_codes, num_rows)
    ej_data['Software Error Code'] = np.random.choice(error_codes, num_rows)

    # Card Type, Card Brand
    ej_data['Card Type'] = np.random.choice(card_types, num_rows)
    ej_data['Card Brand'] = np.random.choice(card_brands, num_rows)

    # PIN Entry Attempts
    ej_data['PIN Entry Attempts'] = np.random.randint(1, 4, num_rows)

    # Cash Dispensed, Cash Deposited
    ej_data['Cash Dispensed'] = np.where(
        ej_data['Transaction_Type'] == 'Withdrawal',
        ej_data['Transaction_Amount'],
        0
    )
    ej_data['Cash Deposited'] = np.where(
        ej_data['Transaction_Type'] == 'Deposit',
        ej_data['Transaction_Amount'],
        0
    )

    # Balance Before Transaction, Balance After Transaction
    initial_balance = 500000
    ej_data['Balance Before Transaction'] = [
        initial_balance - ej_data['Cash Dispensed'].iloc[:i].sum() + ej_data['Cash Deposited'].iloc[:i].sum()
        for i in range(num_rows)
    ]
    ej_data['Balance After Transaction'] = ej_data['Balance Before Transaction'] - ej_data['Cash Dispensed'] + ej_data['Cash Deposited']

    # Surcharge Amount, Fee Amount, Currency Conversion Rate, Network Fee
    ej_data['Surcharge Amount'] = 0 
    ej_data['Currency Conversion Rate'] = np.where(
        ej_data['Currency_Code'] != 'CLP',
        np.random.uniform(0.002, 0.003, num_rows).round(5),
        1
    )

    # Bank Issuer
    ej_data['Bank Issuer'] = np.random.choice(banks, num_rows, p=banks_weight)

    # Network Fee (updated logic)
    ej_data['Network Fee'] = np.where(
        (ej_data['Bank Issuer'] != 'Santander') & (ej_data['Currency_Code'] != 'CLP'),
        1000,  # Network fee when issuer is not Santander and currency is not CLP
        0
    )

    ej_data['Fee Amount'] = np.where(
        ej_data['Card Brand'] != 'Redcompra',
        500, 
        0
    )

    return ej_data

# Main

In [39]:
ej_data = generate_atm_ejournal(num_rows)

In [34]:
pd.set_option('display.max_columns', None)
ej_data.head(5)

Unnamed: 0,Transaction_Datetime,Terminal_ID,Transaction_Type,Transaction_Amount,Currency_Code,Card_Number_Masked,Account_Number_Masked,Authorization Code,Response_Code,Transaction_Status,Customer_Language,ATM_Location,ATM_City,ATM_Country,Card Issuer Country,Transaction Sequence Number,Journal_Sequence_Number,Hardware Error Code,Software Error Code,Card Type,Card Brand,PIN Entry Attempts,Cash Dispensed,Cash Deposited,Balance Before Transaction,Balance After Transaction,Surcharge Amount,Fee Amount,Currency Conversion Rate,Network Fee,Bank Issuer
0,2024-08-21 15:37:14,ATM-00001,Withdrawal,44477.52,CLP,**** **** **** 1000,**** **** **** **** 2000,222144,12,Declined,English,"-23.550940,-70.383630",Antofagasta,Chile,Chile,1,1001,E003,E001,Debit,Redcompra,1,44477.52,0.0,500000.0,455522.48,0,0,1.0,0,Santander
1,2024-08-21 15:36:14,ATM-00002,Transfer,11088.98,CLP,**** **** **** 1001,**** **** **** **** 2001,283561,5,Declined,Spanish,"-33.041626,-71.559660",Viña del Mar,Chile,Brazil,2,1002,,,Debit,Redcompra,1,0.0,0.0,455522.48,455522.48,0,0,1.0,0,Santander
2,2024-08-21 15:35:14,ATM-00003,Deposit,416477.3,EUR,**** **** **** 1002,**** **** **** **** 2002,184665,51,Declined,Spanish,"-36.802198,-73.082498",Concepción,Chile,Chile,3,1003,E003,E003,Credit,Redcompra,1,0.0,416477.3,455522.48,871999.78,0,0,0.00288,0,Santander
3,2024-08-21 15:34:14,ATM-00004,Withdrawal,389300.22,GBP,**** **** **** 1003,**** **** **** **** 2003,481567,5,Declined,Spanish,"-33.012250,-71.649260",Valparaíso,Chile,Chile,4,1004,E002,,Debit,Mastercard,2,389300.22,0.0,871999.78,482699.56,0,500,0.00258,0,Santander
4,2024-08-21 15:33:14,ATM-00005,Transfer,435136.06,CLP,**** **** **** 1004,**** **** **** **** 2004,403648,0,Approved,Spanish,"-32.968831,-71.464686",Viña del Mar,Chile,Chile,5,1005,E003,E001,Debit,Visa,3,0.0,0.0,482699.56,482699.56,0,500,1.0,0,Santander


In [40]:
insert_into_bigquery(project_id, dataset_id, table_id, ej_data)

100000