In [1]:
from load_data import (load_adult_dataset, load_beans_dataset,
                       load_digits_dataset, load_chess_dataset,
                       load_diabetes_dataset, load_sensorless_dataset)

from model_training import get_sample, split_train_test, compute_accuracy, compute_all_models_results

from plots import bar_plot
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from deepforest import CascadeForestClassifier
from sklearn import svm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, InputLayer
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

%load_ext autoreload
%autoreload

In [2]:
def print_accuracy(x_train, y_train, x_test, y_test, clf, model):
    print(f'Model {model}')
    print('Accuracy train: ' + str(compute_accuracy(x_train, y_train, clf, model)['accuracy']))
    print('Accuracy test : ' + str(compute_accuracy(x_test, y_test, clf, model)['accuracy']))

In [3]:
df_adult = load_adult_dataset() #(32561, 38)
df_beans = load_beans_dataset() #(13611, 17)
df_digits = load_digits_dataset() #(1797, 65)
df_chess = load_chess_dataset() #(3196, 37)
df_diabetes = load_diabetes_dataset() #(1151, 20)
df_sensorless = load_sensorless_dataset() #(58509, 49)

# Adult Dataset

In [4]:
df_adult_sample = get_sample(df_adult,1000,0)
x_train, x_test, y_train, y_test  = split_train_test(df_adult_sample)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Default params

In [5]:
clf_deep_forest_default = CascadeForestClassifier(random_state = 42,verbose=0)
clf_deep_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest_default, 'DF')

Model DF
Accuracy train: 100.0
Accuracy test : 84.0


In [6]:
clf_forest_default = RandomForestClassifier(random_state = 42)
clf_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_forest_default, 'RF')

Model RF
Accuracy train: 100.0
Accuracy test : 84.0


In [7]:
clf_tree_default = DecisionTreeClassifier(random_state = 42)
clf_tree_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_tree_default, 'DT')

Model DT
Accuracy train: 100.0
Accuracy test : 82.0


In [8]:
clf_svm_default = svm.SVC(random_state = 42)
clf_svm_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm_default, 'SVM')

Model SVM
Accuracy train: 81.0
Accuracy test : 83.0


In [9]:
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001
sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(1, activation="sigmoid"))
loss = 'binary_crossentropy'
clf_nn_default = Sequential(sequential_list)
clf_nn_default.compile(loss=loss, optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),metrics=['accuracy'])
clf_nn_default.fit(x_train, y_train, epochs=epochs, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf_nn_default, 'DNN')

Model DNN
Accuracy train: 93.0
Accuracy test : 80.0


## Reducing overfitting

In [10]:
clf_deep_forest = CascadeForestClassifier(random_state=42, max_depth=15, 
                                          n_estimators = 2, verbose=0)
clf_deep_forest.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest, 'DF')
# Accuracy train: 97.0
# Accuracy test : 85.0

Model DF
Accuracy train: 97.0
Accuracy test : 85.0


In [11]:
clf_forest = RandomForestClassifier(random_state = 42,
                                    max_depth = 10)
clf_forest.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_forest, 'RF')
# Accuracy train: 93.0
# Accuracy test : 85.0

Model RF
Accuracy train: 93.0
Accuracy test : 85.0


In [12]:
clf_tree = DecisionTreeClassifier(random_state = 42, max_depth = 5, criterion = 'entropy')
clf_tree.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_tree, 'DT')
# Accuracy train: 86.0
# Accuracy test : 85.0

Model DT
Accuracy train: 86.0
Accuracy test : 85.0


In [38]:
clf_svm = svm.SVC(C = 10, random_state = 42)

clf_svm.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm, 'SVM')
# Accuracy train: 80.0
# Accuracy test : 83.0

Model SVM
Accuracy train: 85.0
Accuracy test : 82.0


In [50]:
n_features = x_train.shape[1]
dropout = 0.15
nodes = 150
epochs = 300
learning_rate = 0.001
activation = "elu"
sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation=activation, 
                             kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
            
                   Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(1, activation="sigmoid"))
loss = 'binary_crossentropy'
clf_nn_default = Sequential(sequential_list)
clf_nn_default.compile(loss=loss, optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),metrics=['accuracy'])
clf_nn_default.fit(x_train, y_train, epochs=epochs, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf_nn_default, 'DNN')

Model DNN
Accuracy train: 94.0
Accuracy test : 82.0


# Beans Dataset

In [51]:
df_beans_sample = get_sample(df_beans,1000,10)
x_train, x_test, y_train, y_test  = split_train_test(df_beans_sample)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Default params

In [52]:
clf_deep_forest_default = CascadeForestClassifier(random_state = 42,verbose=0)
clf_deep_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest_default, 'DF')

Model DF
Accuracy train: 100.0
Accuracy test : 90.0


In [53]:
clf_forest_default = RandomForestClassifier(random_state = 42)
clf_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_forest_default, 'RF')

Model RF
Accuracy train: 100.0
Accuracy test : 89.0


In [54]:
clf_tree_default = DecisionTreeClassifier(random_state = 42)
clf_tree_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_tree_default, 'DT')

Model DT
Accuracy train: 100.0
Accuracy test : 86.0


In [55]:
clf_svm_default = svm.SVC(random_state = 42)
clf_svm_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm_default, 'SVM')

Model SVM
Accuracy train: 93.0
Accuracy test : 90.0


In [56]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001

sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(n_classes + 1, activation="softmax"))
loss = "sparse_categorical_crossentropy"
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 97.0
Accuracy test : 91.0


# Reducing overfitting

In [67]:
clf_deep_forest = CascadeForestClassifier(random_state=42, 
                                          max_depth=15, 
                                          n_trees = 100,
                                          verbose=0)
clf_deep_forest.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest, 'DF')
# Accuracy train: 100
# Accuracy test : 90

Model DF
Accuracy train: 100.0
Accuracy test : 90.0


In [76]:
clf_forest = RandomForestClassifier(random_state = 42,
                                    n_estimators = 100,
                                    max_depth = 15)
clf_forest.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_forest, 'RF')
# Accuracy train: 100.0
# Accuracy test : 89.0

Model RF
Accuracy train: 100.0
Accuracy test : 89.0


In [77]:
clf_tree = DecisionTreeClassifier(random_state = 42, 
                                  max_depth = 10, 
                                  criterion = 'entropy')
clf_tree.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_tree, 'DT')
# Accuracy train: 100.0
# Accuracy test : 88.0

Model DT
Accuracy train: 100.0
Accuracy test : 88.0


In [60]:
clf_svm = svm.SVC(C = 10, 
                  random_state = 42)

clf_svm.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm, 'SVM')
# Accuracy train: 95.0
# Accuracy test : 92.0

Model SVM
Accuracy train: 95.0
Accuracy test : 92.0


In [79]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.15
nodes = 200
epochs = 200
learning_rate = 0.001
activation = "relu"
sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(n_classes+1, activation="softmax"))
loss = "sparse_categorical_crossentropy"
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 97.0
Accuracy test : 91.0


# Digits Dataset

In [80]:
df_digits_sample = get_sample(df_digits,1000,10)
x_train, x_test, y_train, y_test  = split_train_test(df_digits_sample)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Default params

In [81]:
clf_deep_forest_default = CascadeForestClassifier(random_state = 42,verbose=0)
clf_deep_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest_default, 'DF')

Model DF
Accuracy train: 100.0
Accuracy test : 95.0


In [82]:
clf_forest_default = RandomForestClassifier(random_state = 42)
clf_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_forest_default, 'RF')

Model RF
Accuracy train: 100.0
Accuracy test : 96.0


In [83]:
clf_tree_default = DecisionTreeClassifier(random_state = 42)
clf_tree_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_tree_default, 'DT')

Model DT
Accuracy train: 100.0
Accuracy test : 81.0


In [84]:
clf_svm_default = svm.SVC(random_state = 42)
clf_svm_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm_default, 'SVM')

Model SVM
Accuracy train: 100.0
Accuracy test : 97.0


In [85]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001

sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(n_classes+1, activation="softmax"))
loss = "sparse_categorical_crossentropy"
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 100.0
Accuracy test : 95.0


# Reducing overfitting

In [86]:
clf_deep_forest = CascadeForestClassifier(random_state=42, 
                                          max_depth=10, 
                                          n_estimators = 2, 
                                          n_trees = 200,
                                          verbose=0)
clf_deep_forest.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest, 'DF')
# Accuracy train: 100
# Accuracy test : 97

Model DF
Accuracy train: 100.0
Accuracy test : 97.0


In [91]:
clf_forest = RandomForestClassifier(random_state = 42,
                                    n_estimators = 100,
                                    max_depth = 15)
clf_forest.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_forest, 'RF')
# Accuracy train: 100.0
# Accuracy test : 95

Model RF
Accuracy train: 100.0
Accuracy test : 96.0


In [88]:
clf_tree = DecisionTreeClassifier(random_state = 42, 
                                  max_depth = 10, 
                                  criterion = 'entropy')
clf_tree.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_tree, 'DT')
# Accuracy train: 100.0
# Accuracy test : 88.0

Model DT
Accuracy train: 100.0
Accuracy test : 85.0


In [94]:
clf_svm = svm.SVC(C = 10, 
                  random_state = 42)

clf_svm.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm, 'SVM')
# Accuracy train: 95.0
# Accuracy test : 92.0

Model SVM
Accuracy train: 100.0
Accuracy test : 99.0


In [90]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001
activation = "relu"
sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(n_classes + 1, activation="softmax"))
loss = "sparse_categorical_crossentropy"
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 100.0
Accuracy test : 97.0


# Chess Dataset

In [95]:
df_chess_sample = get_sample(df_chess,1000,10)
x_train, x_test, y_train, y_test  = split_train_test(df_chess_sample)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Default params

In [96]:
clf_deep_forest_default = CascadeForestClassifier(random_state = 42,verbose=0)
clf_deep_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest_default, 'DF')

Model DF
Accuracy train: 100.0
Accuracy test : 99.0


In [97]:
clf_forest_default = RandomForestClassifier(random_state = 42)
clf_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_forest_default, 'RF')

Model RF
Accuracy train: 100.0
Accuracy test : 98.0


In [98]:
clf_tree_default = DecisionTreeClassifier(random_state = 42)
clf_tree_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_tree_default, 'DT')

Model DT
Accuracy train: 100.0
Accuracy test : 99.0


In [99]:
clf_svm_default = svm.SVC(random_state = 42)
clf_svm_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm_default, 'SVM')

Model SVM
Accuracy train: 95.0
Accuracy test : 96.0


In [100]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001

sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(1, activation="sigmoid"))
loss = 'binary_crossentropy'
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 100.0
Accuracy test : 99.0


# Reducing overfitting

In [101]:
clf_deep_forest = CascadeForestClassifier(random_state=42, 
                                          max_depth=15, 
                                          n_estimators = 4, 
                                          n_trees = 500,
                                          verbose=0)
clf_deep_forest.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest, 'DF')
# Accuracy train: 100
# Accuracy test : 97

Model DF
Accuracy train: 100.0
Accuracy test : 99.0


In [102]:
clf_forest = RandomForestClassifier(random_state = 42,
                                    n_estimators = 100,
                                    max_depth = 15)
clf_forest.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_forest, 'RF')
# Accuracy train: 100.0
# Accuracy test : 95

Model RF
Accuracy train: 100.0
Accuracy test : 99.0


In [103]:
clf_tree = DecisionTreeClassifier(random_state = 42, 
                                  max_depth = 10, 
                                  criterion = 'entropy')
clf_tree.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_tree, 'DT')
# Accuracy train: 100.0
# Accuracy test : 88.0

Model DT
Accuracy train: 100.0
Accuracy test : 99.0


In [104]:
clf_svm = svm.SVC(C = 100, 
                  random_state = 42)

clf_svm.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm, 'SVM')
# Accuracy train: 95.0
# Accuracy test : 92.0

Model SVM
Accuracy train: 100.0
Accuracy test : 98.0


In [105]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001
activation = "relu"
sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(1, activation="sigmoid"))
loss = 'binary_crossentropy'
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 100.0
Accuracy test : 95.0


# Diabetes Dataset

In [106]:
df_diabetes_sample = get_sample(df_diabetes,1000,0)
x_train, x_test, y_train, y_test  = split_train_test(df_diabetes_sample)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Default params

In [107]:
clf_deep_forest_default = CascadeForestClassifier(random_state = 42,verbose=0)
clf_deep_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest_default, 'DF')

Model DF
Accuracy train: 100.0
Accuracy test : 72.0


In [108]:
clf_forest_default = RandomForestClassifier(random_state = 42)
clf_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_forest_default, 'RF')

Model RF
Accuracy train: 100.0
Accuracy test : 70.0


In [109]:
clf_tree_default = DecisionTreeClassifier(random_state = 42)
clf_tree_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_tree_default, 'DT')

Model DT
Accuracy train: 100.0
Accuracy test : 62.0


In [110]:
clf_svm_default = svm.SVC(random_state = 42)
clf_svm_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm_default, 'SVM')

Model SVM
Accuracy train: 66.0
Accuracy test : 66.0


In [111]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001

sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(1, activation="sigmoid"))
loss = 'binary_crossentropy'
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 80.0
Accuracy test : 72.0


# Reducing overfitting

In [130]:
clf_deep_forest = CascadeForestClassifier(random_state=42, 
                                          max_depth = 15,
                                          n_estimators = 2,
                                          verbose=0)
clf_deep_forest.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest, 'DF')
# Accuracy train: 100
# Accuracy test : 72

Model DF
Accuracy train: 100.0
Accuracy test : 70.0


In [124]:
clf_forest = RandomForestClassifier(random_state = 42,
                                    n_estimators = 200,
                                    max_depth = 15)
clf_forest.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_forest, 'RF')
# Accuracy train: 100.0
# Accuracy test : 95

Model RF
Accuracy train: 100.0
Accuracy test : 72.0


In [131]:
clf_tree = DecisionTreeClassifier(random_state = 42, 
                                  max_depth = 16, 
                                  criterion = 'gini')
clf_tree.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_tree, 'DT')
# Accuracy train: 100.0
# Accuracy test : 88.0

Model DT
Accuracy train: 100.0
Accuracy test : 63.0


In [132]:
clf_svm = svm.SVC(C = 100, 
                  random_state = 42)

clf_svm.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm, 'SVM')
# Accuracy train: 95.0
# Accuracy test : 92.0

Model SVM
Accuracy train: 81.0
Accuracy test : 75.0


In [133]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.2
nodes = 100
epochs = 400
learning_rate = 0.001
#activation = "relu"
sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(1, activation="sigmoid"))
loss = 'binary_crossentropy'
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=epochs, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 89.0
Accuracy test : 70.0


# Sensorless Dataset

In [134]:
df_sensorless_sample = get_sample(df_sensorless,1000,10)
x_train, x_test, y_train, y_test  = split_train_test(df_sensorless_sample)
scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## Default params

In [135]:
clf_deep_forest_default = CascadeForestClassifier(random_state = 42,verbose=0)
clf_deep_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest_default, 'DF')

Model DF
Accuracy train: 100.0
Accuracy test : 95.0


In [136]:
clf_forest_default = RandomForestClassifier(random_state = 42)
clf_forest_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_forest_default, 'RF')

Model RF
Accuracy train: 100.0
Accuracy test : 95.0


In [137]:
clf_tree_default = DecisionTreeClassifier(random_state = 42)
clf_tree_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_tree_default, 'DT')

Model DT
Accuracy train: 100.0
Accuracy test : 86.0


In [138]:
clf_svm_default = svm.SVC(random_state = 42)
clf_svm_default.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm_default, 'SVM')

Model SVM
Accuracy train: 74.0
Accuracy test : 56.99999999999999


In [146]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.1
nodes = 100
epochs = 200
learning_rate = 0.001

sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation="elu", kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(n_classes+1, activation="softmax"))
loss = "sparse_categorical_crossentropy"
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 99.0
Accuracy test : 87.0


# Reducing overfitting

In [140]:
clf_deep_forest = CascadeForestClassifier(random_state=42, 
                                          max_depth=15, 
                                          verbose=0)
clf_deep_forest.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_deep_forest, 'DF')
# Accuracy train: 100
# Accuracy test : 97

Model DF
Accuracy train: 100.0
Accuracy test : 95.0


In [141]:
clf_forest = RandomForestClassifier(random_state = 42,
                                    n_estimators = 100,
                                    max_depth = 15)
clf_forest.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_forest, 'RF')
# Accuracy train: 100.0
# Accuracy test : 95

Model RF
Accuracy train: 100.0
Accuracy test : 95.0


In [142]:
clf_tree = DecisionTreeClassifier(random_state = 42, 
                                  max_depth = 10, 
                                  criterion = 'entropy')
clf_tree.fit(x_train, y_train)

print_accuracy(x_train, y_train, x_test, y_test, clf_tree, 'DT')
# Accuracy train: 100.0
# Accuracy test : 88.0

Model DT
Accuracy train: 99.0
Accuracy test : 87.0


In [143]:
clf_svm = svm.SVC(C = 100, 
                  random_state = 42)

clf_svm.fit(x_train, y_train)
print_accuracy(x_train, y_train, x_test, y_test, clf_svm, 'SVM')
# Accuracy train: 95.0
# Accuracy test : 92.0

Model SVM
Accuracy train: 100.0
Accuracy test : 86.0


In [149]:
n_classes = len(np.unique(y_train))
n_features = x_train.shape[1]
dropout = 0.15
nodes = 100
epochs = 200
learning_rate = 0.001
sequential_list = [InputLayer(n_features),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal()),
                       Dropout(dropout),
                       Dense(nodes, activation=activation, kernel_initializer=tf.keras.initializers.HeNormal())
                       ]

sequential_list.append(Dense(n_classes + 1, activation="softmax"))
loss = "sparse_categorical_crossentropy"
clf = Sequential(sequential_list)
clf.compile(loss=loss,
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics=['accuracy'])
clf.fit(x_train, y_train, epochs=200, batch_size=32, verbose=0)
print_accuracy(x_train, y_train, x_test, y_test, clf, 'DNN')

Model DNN
Accuracy train: 96.0
Accuracy test : 82.0
