In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model, Sequential
import pandas as pd
import numpy as np
import model_training_utils as mtu

2023-06-16 02:31:25.091217: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Data prep and training

In [2]:
data = pd.read_csv("data_sliced_tda/svDB.csv")

In [3]:
# Drop the unnecessary data
data = data[['betti_H1', 'ann']]
# Fix the lame type, string -> list
data['betti_H1'] = data['betti_H1'].apply(lambda x: mtu.fix_betti_string_svdb(x))
# Turn the list into a np.array of floats
data['betti_H1'] = data['betti_H1'].apply(lambda x: (np.array([float(num) for num in x])))

# Split the data into data and classes
X, y = data['betti_H1'], data['ann']

# Split to train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=420)

# Encode the classes
y_train = to_categorical(np.array(y_train.to_list()))

# ????? Make a 3dim train data I guess
X_train = np.array(X_train.to_list())
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

In [None]:
model = load_model('models_raw/4conv_1dens32.hdf5')

In [None]:
history = model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.2, verbose=1)

In [10]:
model.save("models_trained/4conv_1dens32_trained.hdf5")

# Random test block

In [19]:
#print("X: ", X.shape, " | y: ", y.shape)
#print("X shape: ", X_train.shape, " | X type: ", type(X_train), " | X[0] shape: ", X_train[0].shape, " | X[0] type: ", type(X_train[0]))
#print("X train: ", X_train.shape, " | X test: ", X_test.shape, " | y train: ", y_train.shape, " | y test: ", y_test.shape)

X:  (184045,)  | y:  (184045,)


# Model testing and evaluating

In [None]:
model = load_model('models_trained/svdb_trained.hdf5')

In [5]:
# Encode the test classes
y_test = to_categorical(np.array(y_test.to_list()))

# ????? Make a 3dim train data I guess
X_test = np.array(X_test.to_list())
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [6]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.29227498173713684
Test Accuracy: 0.8884512186050415


In [7]:
y_true = np.argmax(y_test, axis=1)
y_pred = np.argmax(model.predict(X_test), axis=1)

confusion_matrix(y_true, y_pred)



array([[32040,   344],
       [ 3762,   663]])

In [8]:
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.99      0.94     32384
           1       0.66      0.15      0.24      4425

    accuracy                           0.89     36809
   macro avg       0.78      0.57      0.59     36809
weighted avg       0.87      0.89      0.86     36809

