# Synthetic ATM Electronic journal generator

* Generated with AI support
* Luis Gerardo Baeza, Aug 2024

In [35]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from google.cloud import bigquery

project_id = ""
dataset_id = f"bank_retail"
table_id = "atm_journal"


In [37]:
num_rows = 100000 #100K
locations = {
    'Santiago': (-33.4569400, -70.6482700),
    'Concepción': (-36.8269, -73.0501),
    'Valparaíso': (-33.0472, -71.6127),
    'Antofagasta': (-23.6500, -70.4000),
    'Viña del Mar': (-33.0245, -71.5546)
}
currencies = ['CLP', 'USD', 'EUR', 'JPY', 'GBP'] 
currency_weight = [0.7, 0.1, 0.1, 0.05, 0.05]
languages = ['Spanish', 'English']
banks = ['Santander', 'Banco de Chile', 'BCI', 'Scotiabank', 'Itaú']
banks_weight = [0.7, 0.1, 0.1, 0.05, 0.05]
countries = ['Chile', 'USA', 'Argentina', 'Peru', 'Brazil']
countries_weight = [0.7, 0.05, 0.10, 0.05, .10]
card_types = ['Debit', 'Credit']
card_brands = ['Visa', 'Mastercard', 'Redcompra']
transaction_types = ['Withdrawal', 'Deposit', 'Balance Inquiry', 'Transfer']
response_codes = ['00', '01', '05', '12', '51']
response_codes_weight = [0.60, 0.10, 0.10, 0.10, 0.10]

In [30]:
def insert_into_bigquery(project_id, dataset_id, table_id, df):
    client = bigquery.Client(project=project_id)

    # Get the table reference
    table_ref = client.dataset(dataset_id).table(table_id)

    # Configure the job
    job_config = bigquery.LoadJobConfig(
        # Autodetect the schema from the DataFrame
        autodetect=True,
        # Specify the write disposition (append to existing table, replace, etc.)
        write_disposition="WRITE_APPEND",  # Or WRITE_TRUNCATE, WRITE_EMPTY
        # Source format is CSV (you can change this if needed)
        source_format=bigquery.SourceFormat.CSV
    )

    # Load the data into BigQuery
    job = client.load_table_from_dataframe(
        df, table_ref, job_config=job_config
    )

    job.result()

    return len(df)

In [31]:
def generate_random_location(city):
    city = str(city) 
    center_lat, center_lon = locations[city]
    lat = np.random.uniform(center_lat - 0.1, center_lat + 0.1)
    lon = np.random.uniform(center_lon - 0.1, center_lon + 0.1)
    return f"{lat:.6f},{lon:.6f}"

In [41]:
def generate_atm_ejournal(num_rows):
    columns = [
        'Transaction_Datetime', 'Transaction_Duration', 'User_Clicks', 'Terminal_ID', 'Transaction_Type', 'Transaction_Amount',
        'Currency_Code', 'Card_Number_Masked', 'Account_Number_Masked', 'Authorization_Code',
        'Response_Code', 'Transaction_Status', 'Customer_Language', 'ATM_Location', 'ATM_City', 'ATM_Country',
        'Card_Issuer_Country', 'Transaction_Sequence_Number', 'Journal_Sequence_Number',
        'Hardware_Error_Code', 'Software_Error_Code', 'Card_Type', 'Card_Brand', 'PIN_Entry_Attempts',
        'Cash_Dispensed', 'Cash_Deposited', 'Balance_Before_Transaction', 'Balance_After_Transaction',
        'Surcharge_Amount', 'Fee_Amount', 'Currency_Conversion_Rate', 'Network_Fee', 'Bank_Issuer'
    ]
    
    # Create a DataFrame with the specified number of rows and 31 columns (added Bank_Issuer)
    ej_data = pd.DataFrame(np.nan, index=range(num_rows), columns=range(len(columns)))

    # Rename columns to meaningful names
    ej_data.columns = columns

    # Populate the DataFrame with sample data
    np.random.seed(0)  # For reproducibility

    # Transaction_Datetime
    now = datetime.now()
    ej_data['Transaction_Datetime'] = [
        (now - timedelta(minutes=i)).strftime('%Y-%m-%d %H:%M:%S') for i in range(num_rows)
    ]
    ej_data['Transaction_Duration'] = np.random.randint(10, 400, num_rows)
    ej_data['User_Clicks'] = np.random.randint(4, 30, num_rows)

    # Terminal_ID
    ej_data['Terminal_ID'] = ['ATM-' + str(i).zfill(5) for i in range(1, num_rows + 1)]

    # Transaction_Type
    ej_data['Transaction_Type'] = np.random.choice(transaction_types, num_rows)

    # Transaction_Amount, Currency_Code
    ej_data['Currency_Code'] = np.random.choice(currencies, num_rows, p=currency_weight)
    ej_data['Transaction_Amount'] = np.random.uniform(1000, 500000, num_rows).round(2) 

    # Card_Number_Masked, Account_Number_Masked
    ej_data['Card_Number_Masked'] = ['**** **** **** ' + str(i).zfill(4) for i in range(1000, 1000 + num_rows)]
    ej_data['Account_Number_Masked'] = ['**** **** **** **** ' + str(i).zfill(4) for i in range(2000, 2000 + num_rows)]

    # Authorization_Code, Response_Code
    ej_data['Authorization_Code'] = [str(i).zfill(6) for i in np.random.randint(100000, 999999, num_rows)]
    ej_data['Response_Code'] = np.random.choice(response_codes, num_rows, response_codes_weight)

    # Transaction_Status
    ej_data['Transaction_Status'] = np.where(ej_data['Response_Code'] == '00', 'Approved', 'Declined')

    # Customer_Language
    ej_data['Customer_Language'] = np.random.choice(languages, num_rows, p=[0.9, 0.1])

    # ATM_Location, ATM_Country
    cities = list(locations.keys())
    ej_data['ATM_City'] = np.random.choice(cities, num_rows)
    ej_data['ATM_Location'] = ej_data['ATM_City'].apply(generate_random_location)
    ej_data['ATM_Country'] = ['Chile'] * num_rows

    # Card_Issuer_Country
    ej_data['Card_Issuer_Country'] = np.random.choice(countries, num_rows, p=countries_weight)

    # Transaction_Sequence_Number, Journal_Sequence_Number
    ej_data['Transaction_Sequence_Number'] = range(1, num_rows + 1)
    ej_data['Journal_Sequence_Number'] = range(1001, 1001 + num_rows)

    # Hardware_Error_Code, Software_Error_Code
    error_codes = ['E001', 'E002', 'E003', 'None']
    ej_data['Hardware_Error_Code'] = np.random.choice(error_codes, num_rows)
    ej_data['Software_Error_Code'] = np.random.choice(error_codes, num_rows)

    # Card_Type, Card_Brand
    ej_data['Card_Type'] = np.random.choice(card_types, num_rows)
    ej_data['Card_Brand'] = np.random.choice(card_brands, num_rows)

    # PIN_Entry_Attempts
    ej_data['PIN_Entry_Attempts'] = np.random.randint(1, 4, num_rows)

    # Cash_Dispensed, Cash_Deposited
    ej_data['Cash_Dispensed'] = np.where(
        ej_data['Transaction_Type'] == 'Withdrawal',
        ej_data['Transaction_Amount'],
        0
    )
    ej_data['Cash_Deposited'] = np.where(
        ej_data['Transaction_Type'] == 'Deposit',
        ej_data['Transaction_Amount'],
        0
    )

    # Balance_Before_Transaction, Balance_After_Transaction
    initial_balance = 500000
    ej_data['Balance_Before_Transaction'] = [
        initial_balance - ej_data['Cash_Dispensed'].iloc[:i].sum() + ej_data['Cash_Deposited'].iloc[:i].sum()
        for i in range(num_rows)
    ]
    ej_data['Balance_After_Transaction'] = ej_data['Balance_Before_Transaction'] - ej_data['Cash_Dispensed'] + ej_data['Cash_Deposited']

    # Surcharge_Amount, Fee_Amount, Currency_Conversion_Rate, Network_Fee
    ej_data['Surcharge_Amount'] = 0 
    ej_data['Currency_Conversion_Rate'] = np.where(
        ej_data['Currency_Code'] != 'CLP',
        np.random.uniform(0.002, 0.003, num_rows).round(5),
        1
    )

    # Bank_Issuer
    ej_data['Bank_Issuer'] = np.random.choice(banks, num_rows, p=banks_weight)

    # Network_Fee (updated logic)
    ej_data['Network_Fee'] = np.where(
        (ej_data['Bank_Issuer'] != 'Santander') & (ej_data['Currency_Code'] != 'CLP'),
        1000,  # Network_Fee when issuer is not Santander and currency is not CLP
        0
    )

    ej_data['Fee_Amount'] = np.where(
        ej_data['Card_Brand'] != 'Redcompra',
        500, 
        0
    )

    return ej_data

# Main

In [42]:
ej_data = generate_atm_ejournal(num_rows)

In [43]:
pd.set_option('display.max_columns', None)
ej_data.head(5)

Unnamed: 0,Transaction_Datetime,Transaction_Duration,User_Clicks,Terminal_ID,Transaction_Type,Transaction_Amount,Currency_Code,Card_Number_Masked,Account_Number_Masked,Authorization_Code,Response_Code,Transaction_Status,Customer_Language,ATM_Location,ATM_City,ATM_Country,Card_Issuer_Country,Transaction_Sequence_Number,Journal_Sequence_Number,Hardware_Error_Code,Software_Error_Code,Card_Type,Card_Brand,PIN_Entry_Attempts,Cash_Dispensed,Cash_Deposited,Balance_Before_Transaction,Balance_After_Transaction,Surcharge_Amount,Fee_Amount,Currency_Conversion_Rate,Network_Fee,Bank_Issuer
0,2024-08-21 16:33:59,182,24,ATM-00001,Transfer,129998.53,JPY,**** **** **** 1000,**** **** **** **** 2000,645920,0,Approved,Spanish,"-23.664802,-70.476519",Antofagasta,Chile,Chile,1,1001,E001,E001,Debit,Visa,3,0.0,0.0,500000.0,500000.0,0,500,0.00201,0,Santander
1,2024-08-21 16:32:59,57,6,ATM-00002,Balance Inquiry,131905.42,CLP,**** **** **** 1001,**** **** **** **** 2001,486857,12,Declined,Spanish,"-33.014884,-71.587057",Viña del Mar,Chile,Chile,2,1002,,,Debit,Mastercard,1,0.0,0.0,500000.0,500000.0,0,500,1.0,0,BCI
2,2024-08-21 16:31:59,127,27,ATM-00003,Transfer,322998.3,USD,**** **** **** 1002,**** **** **** **** 2002,538959,51,Declined,Spanish,"-33.119519,-71.606093",Viña del Mar,Chile,Chile,3,1003,E001,E002,Debit,Visa,2,0.0,0.0,500000.0,500000.0,0,500,0.00229,0,Santander
3,2024-08-21 16:30:59,202,20,ATM-00004,Transfer,452255.03,CLP,**** **** **** 1003,**** **** **** **** 2003,629807,1,Declined,Spanish,"-33.425582,-70.713543",Santiago,Chile,Chile,4,1004,E003,E003,Credit,Mastercard,1,0.0,0.0,500000.0,500000.0,0,500,1.0,0,Santander
4,2024-08-21 16:29:59,333,7,ATM-00005,Deposit,443620.68,CLP,**** **** **** 1004,**** **** **** **** 2004,402120,51,Declined,Spanish,"-33.095329,-71.663935",Valparaíso,Chile,Chile,5,1005,,,Credit,Mastercard,1,0.0,443620.68,500000.0,943620.68,0,500,1.0,0,Santander


In [None]:
insert_into_bigquery(project_id, dataset_id, table_id, ej_data)