In [1]:
#Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os
import copy

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import metrics
from tensorflow.keras.metrics import Metric
import keras_tuner as kt

from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

In [2]:
#Custom Imports
sys.path.append('../src')
import ml_functions as ml
import neural_network as nn

In [3]:
#Loading Data
df = pd.read_csv('../data/training_set_(50_50).csv', index_col = 0)
df_test = pd.read_csv('../data/testing_set_(90_10).csv', index_col = 0)

In [4]:
#Dropping unnecessary columns
to_drop = ['account_creation_time','account_active_duration','time_between_first_and_last_transaction',
           'gini_coefficient_accounts_received','gini_coefficient_accounts_sent',
           'gini_coefficient_values_received','gini_coefficient_values_sent']
df.drop(to_drop, axis = 1, inplace = True)
df_test.drop(to_drop, axis = 1, inplace = True)

In [5]:
#Shuffling Data
df = df.sample(frac = 1, random_state = 2022)
validation_frac = 0.2
validation_index = int(len(df)*(1-validation_frac))+1
x_train = df.iloc[:validation_index, :-1]
y_train = df.iloc[:validation_index, -1].astype(int)
x_val = df.iloc[validation_index:, :-1]
y_val = df.iloc[validation_index:, -1].astype(int)

In [6]:
#Scaling Train Data
scaler = PowerTransformer()
#scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_train = tf.convert_to_tensor(x_train, dtype = tf.float64)
y_train = tf.convert_to_tensor(y_train, dtype = tf.float64)

In [7]:
#Scaling Validation Data
x_val = tf.convert_to_tensor(scaler.transform(x_val), dtype = tf.float64)
y_val = tf.convert_to_tensor(y_val, dtype = tf.float64)

In [8]:
#Scaling Test Data
x_test = df_test.loc[:,df_test.columns != 'ponzi']
x_test = tf.convert_to_tensor(scaler.transform(x_test), dtype = tf.float64)
y_test = tf.convert_to_tensor(df_test.ponzi.astype(int), dtype = tf.float64)

In [9]:
def build_model(hp):
    #Defining hyperparameter options
    hp_n_dense = hp.Int('n_dense', 4, 32, 4)
    total_n_nodes = 1024
    hp_dropout_param = hp.Float('dropout_param', min_value = 0.1, max_value = 0.5, step = 0.05)
    
    n_nodes_per_layer = total_n_nodes // hp_n_dense
    
    layer_types = ['dense','dropout'] * hp_n_dense
    
    n_nodes = [n_nodes_per_layer if x == 'dense' else None for x in layer_types]
    activations = [tf.nn.leaky_relu if x == 'dense' else None for x in layer_types]
    
    #Creating NN
    model = tf.keras.Sequential()
    for lay, n_node, act in zip(layer_types, n_nodes, activations):
        if lay == 'dense':
            model.add(layers.Dense(n_node, activation = act, dtype = 'float64'))
        elif lay == 'dropout':
            model.add(layers.Dropout(hp_dropout_param, dtype = 'float64'))

    #Output layer
    model.add(layers.Dense(1, activation = tf.math.sigmoid, dtype='float64'))

    #Compiling
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 1E-4), loss = 'BinaryCrossentropy',
                  metrics = [metrics.BinaryAccuracy(), metrics.Precision(), metrics.Recall()])
    
    return model

In [10]:
#Defining callback options
callback_es = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 50, verbose = 1)
callback_rlr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 5,
                                                    min_delta = 1E-4, cooldown = 5, min_lr = 1E-6, verbose = 1)
callback_mcp = tf.keras.callbacks.ModelCheckpoint(
    filepath = os.path.join(r'C:\Users\quekh\Desktop\temp', 'rs3', 'ckpt', 'weights.{epoch:02d}-{val_loss:.2f}'),
    save_freq = 'epoch', save_weights_only = True, verbose = 1,
    monitor = 'val_loss', mode = 'min', save_best_only = True)

callback_list = [callback_es, callback_rlr, callback_mcp]

In [11]:
#Defining hyperparameter search method
tuner_rs = kt.RandomSearch(build_model, objective = kt.Objective('val_loss', 'min'), max_trials = 50,
                           directory = r'C:\Users\quekh\Desktop\temp', project_name = 'rs3')

In [12]:
#Random Search
tuner_rs.search(x_train, y_train, epochs = 1000,
                validation_data = (x_val, y_val), callbacks = callback_list)

Trial 50 Complete [00h 03m 31s]
val_loss: 0.6956899166107178

Best val_loss So Far: 0.23328042030334473
Total elapsed time: 13h 17m 57s
INFO:tensorflow:Oracle triggered exit


In [13]:
best_models_rs = tuner_rs.get_best_models(num_models = 5)
best_hp_rs = tuner_rs.get_best_hyperparameters(num_trials = 5)

In [14]:
for i in range(5):
    print('Random Search:')
    print(best_hp_rs[i].get('n_dense'), best_hp_rs[i].get('dropout_param'))

Random Search:
8 0.1
Random Search:
4 0.20000000000000004
Random Search:
4 0.1
Random Search:
4 0.3500000000000001
Random Search:
4 0.30000000000000004
