In [1]:
#Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os
import copy

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import metrics
from tensorflow.keras.metrics import Metric
import keras_tuner as kt

from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

In [2]:
#Custom Imports
sys.path.append('../src')
import ml_functions as ml
import neural_network as nn

In [3]:
#Loading Data
df = pd.read_csv('../data/training_set_(50_50).csv', index_col = 0)

In [4]:
#Splitting Data
df_list_list, index_list_list = ml.split_data(df, 'ponzi', StratifiedKFold, n_splits= 10, shuffle = True, random_state = 2022)
data_list_list = copy.deepcopy(df_list_list)

In [5]:
#Scaling Data
col_ss = ['account_creation_time', 'gini_coefficient_accounts_received', 'gini_coefficient_accounts_sent', 'gini_coefficient_values_received', 'gini_coefficient_values_sent']
col_mm = []
col_pt = [x for x in df.columns if x not in col_ss and x != 'ponzi' and x not in col_mm]
scale_cols = [col_pt, col_ss, col_mm]
scale_methods = [PowerTransformer(method = 'box-cox'), StandardScaler(), MinMaxScaler()]
for i, (x_train, x_test, y_train, y_test) in enumerate(data_list_list):
    x_train = ml.scaling(x_train, scale_cols, scale_methods, default_scaler = 2)
    x_test = ml.scaling(x_test, scale_cols, scale_methods, default_scaler = 2)
    
    x_train = tf.convert_to_tensor(x_train, dtype = tf.float64)
    x_test = tf.convert_to_tensor(x_test, dtype = tf.float64)
    y_train = tf.convert_to_tensor(y_train, dtype = tf.float64)
    y_test = tf.convert_to_tensor(y_test, dtype = tf.float64)
    
    data_list_list[i] = [x_train, x_test, y_train, y_test]

In [6]:
#Selecting which K-fold of data to work with
kfold = 0

In [7]:
def build_model(hp):
    #Defining hyperparameter options
    hp_n_dense = hp.Int('n_dense', 1, 64)
    total_n_nodes = 1024
    hp_dropout_param = hp.Float('dropout_param', min_value = 0, max_value = 0.5, step = 0.1)
    
    n_nodes_per_layer = total_n_nodes // hp_n_dense
    
    layer_types = ['dense','dropout'] * hp_n_dense
    
    n_nodes = [n_nodes_per_layer if x == 'dense' else None for x in layer_types]
    activations = [tf.nn.leaky_relu if x == 'dense' else None for x in layer_types]
    
    #Creating NN
    model = tf.keras.Sequential()
    for lay, n_node, act in zip(layer_types, n_nodes, activations):
        if lay == 'dense':
            model.add(layers.Dense(n_node, activation = act, dtype = 'float64'))
        elif lay == 'dropout':
            model.add(layers.Dropout(hp_dropout_param, dtype = 'float64'))

    #Output layer
    model.add(layers.Dense(1, activation = tf.math.sigmoid, dtype='float64'))

    #Compiling
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.1), loss = 'BinaryCrossentropy',
                  metrics = [metrics.BinaryAccuracy(), metrics.Precision(), metrics.Recall()])
    
    return model

In [8]:
#Defining callback options
callback_es = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 50, verbose = 1)
callback_rlr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 5,
                                                    min_delta = 1E-4, cooldown = 5, min_lr = 1E-6, verbose = 1)
callback_mcp = tf.keras.callbacks.ModelCheckpoint(
    filepath = os.path.join(r'C:\Users\quekh\Desktop\temp', 'hp_tuning', 'ckpt', 'weights.{epoch:02d}-{val_loss:.2f}'),
    save_freq = 'epoch', save_weights_only = True, verbose = 1,
    monitor = 'val_binary_accuracy', mode = 'max', save_best_only = True)

callback_list = [callback_es, callback_rlr, callback_mcp]

In [9]:
#Defining 2 methods of conducting hyperparameter tuning
tuner_rs = kt.RandomSearch(build_model, objective = kt.Objective('val_recall', 'max'), max_trials = 100,
                           directory = r'C:\Users\quekh\Desktop\temp', project_name = 'rs')
tuner_hpb = kt.Hyperband(build_model, objective = kt.Objective('val_recall', 'max'), max_epochs = 1000, factor = 3,
                         directory = r'C:\Users\quekh\Desktop\temp', project_name = 'hpb')

In [10]:
#Random Search
tuner_rs.search(df_list_list[kfold][0], df_list_list[kfold][2], epochs = 1000,
                validation_data = (df_list_list[kfold][1], df_list_list[kfold][3]), callbacks = callback_list)

Trial 100 Complete [00h 03m 28s]
val_recall: 1.0

Best val_recall So Far: 1.0
Total elapsed time: 09h 36m 10s
INFO:tensorflow:Oracle triggered exit


In [11]:
#Hyperband Search
tuner_hpb.search(df_list_list[kfold][0], df_list_list[kfold][2],
                 epochs = 1000, validation_data = (df_list_list[kfold][1], df_list_list[kfold][3]), callbacks = callback_list)

Trial 339 Complete [00h 00m 11s]
val_recall: 0.0019193857442587614

Best val_recall So Far: 1.0
Total elapsed time: 01h 07m 05s
INFO:tensorflow:Oracle triggered exit


In [19]:
best_models_rs = tuner_rs.get_best_models(num_models = 5)
best_hp_rs = tuner_rs.get_best_hyperparameters(num_trials = 5)
best_models_hpb = tuner_hpb.get_best_models(num_models = 5)
best_hp_hpb = tuner_hpb.get_best_hyperparameters(num_trials = 5)







In [27]:
for i in range(5):
    print('Random Search:')
    print(best_hp_rs[i].get('n_dense'), best_hp_rs[i].get('dropout_param'))
    print('Hypberband Search:')
    print(best_hp_hpb[i].get('n_dense'), best_hp_hpb[i].get('dropout_param'))

Random Search:
45 0.2
Hypberband Search:
20 0.1
Random Search:
61 0.1
Hypberband Search:
33 0.5
Random Search:
16 0.30000000000000004
Hypberband Search:
62 0.4
Random Search:
14 0.0
Hypberband Search:
55 0.2
Random Search:
62 0.2
Hypberband Search:
49 0.4
