In [None]:
import os
import datetime

from simulator.generator import generate_dataset, add_frauds


In [None]:
N_CUSTOMERS = 5000
N_TERMINALS = 10000
N_DAYS = 183

START_DATE = "2020-04-01"


In [None]:
DIR_OUTPUT = "./data/simulated/"

# make sure the dirs exist
if not os.path.exists(DIR_OUTPUT):
    os.makedirs(DIR_OUTPUT)
    os.makedirs(DIR_OUTPUT + 'pkl/')
    os.makedirs(DIR_OUTPUT + 'csv/')
    os.makedirs(DIR_OUTPUT + 'fraud/')


In [None]:
# create the transactions
(customer_profiles_table, terminal_profiles_table, transactions_df) = generate_dataset(
    n_customers=N_CUSTOMERS, n_terminals=N_TERMINALS, nb_days=N_DAYS, start_date=START_DATE, r=5)


In [None]:
# add fraud scenarios to the tx data
transactions_df = add_frauds(
    customer_profiles_table, terminal_profiles_table, transactions_df)


In [None]:
# save simulated tx data
start_date = datetime.datetime.strptime(START_DATE, "%Y-%m-%d")

for day in range(transactions_df.TX_TIME_DAYS.max()+1):
    date = start_date + datetime.timedelta(days=day)
    filename_output = date.strftime("%Y-%m-%d")

    transactions_day = transactions_df[transactions_df.TX_TIME_DAYS == day].sort_values('TX_TIME_SECONDS')
    fraud_day = transactions_day[transactions_day.TX_FRAUD == 1].sort_values('TRANSACTION_ID')

    # save the full data as serialized objects
    transactions_day.to_pickle(DIR_OUTPUT + "pkl/" + filename_output + '.pkl', protocol=4)

    # drop "unwanted" columns
    transactions_day = transactions_day.drop(['TX_TIME_SECONDS', 'TX_TIME_DAYS', 'TX_FRAUD', 'TX_FRAUD_SCENARIO'], axis=1)
    fraud_day = fraud_day.drop(['TX_TIME_SECONDS', 'TX_TIME_DAYS'], axis=1)
    
    transactions_day.to_csv(DIR_OUTPUT + "csv/" + filename_output + '.csv', index=False)
    fraud_day.to_csv(DIR_OUTPUT + "fraud/" + filename_output + '_fraud.csv', index=False)


In [None]:
# also save the customer and terminal data
terminal_profiles_table.to_pickle(
    DIR_OUTPUT+'terminal_profiles_table.pkl', protocol=4)
customer_profiles_table.to_pickle(
    DIR_OUTPUT+'customer_profiles_table.pkl', protocol=4)
