In [174]:
from data import Data
from dimension_reduction import PCADimensionReduction

from keras.models import Sequential
from keras.layers import Dense
from keras import metrics

In [175]:
# You can test the other datasets by changing these file names
data_filepath = "data/SC_integration/counts_ctc_simulated_123_5k.tsv"
true_results_filepath = "data/SC_integration/ids_ctc_simulated_123_5k.tsv"
train_indices_filepath = "data/SC_integration/train_indices.npy"
test_indices_filepath = "data/SC_integration/test_indices.npy"
SEED = 42
FOLD_NUMBER = 3
BATCH_SIZE = 8

# You can change these values to work better for models
CUT_BY_MAX_THRESHOLD = 4
PCA_VARIABLES_AMOUNT = 60

# There are 4 data variants to check: regular, scaled, cut by max, pca reduced + cut by max
data_object = Data(data_filepath, true_results_filepath)
train_data, test_data, train_true_results, test_true_results =  data_object.load_train_test_split(train_indices_filepath, test_indices_filepath)
scaled_train_data, scaled_test_data = data_object.get_scaled_train_test_data()

In [176]:
cut_by_max_train_data, cut_by_max_test_data = data_object.get_cut_by_max_train_test_data(CUT_BY_MAX_THRESHOLD)

pca_object = PCADimensionReduction(cut_by_max_train_data, scaled_train_data, train_true_results, SEED)
pca_variables = pca_object.get_most_important_variables_from_pc1(PCA_VARIABLES_AMOUNT)
pca_reduced_train_data = cut_by_max_train_data[pca_variables.index]
pca_reduced_test_data = cut_by_max_test_data[pca_variables.index]

In [177]:
def create_neural_network(input_shape):
    model = Sequential()
    model.add(Dense(1000, input_shape=(input_shape,), activation='relu'))
    model.add(Dense(500, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[metrics.BinaryAccuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall()])
    return model

In [178]:
def display_neural_network_metrics(model, batch_size_value, data, true_results, set_name):
    results = model.evaluate(data, true_results, batch_size=batch_size_value)

    print(f"{set_name} loss: {results[0]}")
    print(f"{set_name} binary accuracy: {results[1]}")
    print(f"{set_name} auc: {results[2]}")
    print(f"{set_name} precision: {results[3]}")
    print(f"{set_name} recall: {results[4]}")

## Neural Network

### Regular data

In [179]:
neural_network = create_neural_network(train_data.shape[1])
neural_network.fit(train_data, train_true_results, batch_size=BATCH_SIZE, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f54882306a0>

In [180]:
display_neural_network_metrics(neural_network, BATCH_SIZE, train_data, train_true_results, "Train")

Train loss: 7.282344172132582e-11
Train binary accuracy: 1.0
Train auc: 1.0
Train precision: 1.0
Train recall: 1.0


In [181]:
display_neural_network_metrics(neural_network, BATCH_SIZE, test_data, test_true_results, "Test")

Test loss: 0.3357192277908325
Test binary accuracy: 0.9886191487312317
Test auc: 0.5
Test precision: 0.0
Test recall: 0.0


### Scaled data

In [182]:
neural_network = create_neural_network(scaled_train_data.shape[1])
neural_network.fit(scaled_train_data, train_true_results, batch_size=BATCH_SIZE, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f54701a5a00>

In [183]:
display_neural_network_metrics(neural_network, BATCH_SIZE, scaled_train_data, train_true_results, "Test")

Test loss: 1.6007710056328274e-17
Test binary accuracy: 1.0
Test auc: 1.0
Test precision: 1.0
Test recall: 1.0


In [184]:
display_neural_network_metrics(neural_network, BATCH_SIZE, scaled_test_data, test_true_results, "Test")

Test loss: 9.569055557250977
Test binary accuracy: 0.9901365637779236
Test auc: 0.7312100529670715
Test precision: 0.5833333134651184
Test recall: 0.46666666865348816


### Cut by max

In [185]:
neural_network = create_neural_network(cut_by_max_train_data.shape[1])
neural_network.fit(cut_by_max_train_data, train_true_results, batch_size=BATCH_SIZE, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f547c146be0>

In [186]:
display_neural_network_metrics(neural_network, BATCH_SIZE, cut_by_max_train_data, train_true_results, "Train")

Train loss: 3.278401439210654e-10
Train binary accuracy: 1.0
Train auc: 1.0
Train precision: 1.0
Train recall: 1.0


In [187]:
display_neural_network_metrics(neural_network, BATCH_SIZE, cut_by_max_test_data, test_true_results, "Test")

Test loss: 0.10093561559915543
Test binary accuracy: 0.9946889281272888
Test auc: 0.8657969236373901
Test precision: 0.8333333134651184
Test recall: 0.6666666865348816


### PCA + cut by max

In [188]:
neural_network = create_neural_network(pca_reduced_train_data.shape[1])
neural_network.fit(pca_reduced_train_data, train_true_results, batch_size=BATCH_SIZE, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f547c7132b0>

In [189]:
display_neural_network_metrics(neural_network, BATCH_SIZE, pca_reduced_train_data, train_true_results, "Train")

Train loss: 2.820365052613738e-09
Train binary accuracy: 1.0
Train auc: 1.0
Train precision: 1.0
Train recall: 1.0


In [190]:
display_neural_network_metrics(neural_network, BATCH_SIZE, pca_reduced_test_data, test_true_results, "Test")

Test loss: 0.09350316226482391
Test binary accuracy: 0.9939302206039429
Test auc: 0.7991302013397217
Test precision: 0.8888888955116272
Test recall: 0.5333333611488342
