In [1]:
from data import Data
from dimension_reduction import PCADimensionReduction
from simple_ml_models import *
from utils import *

from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import IsolationForest, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn import svm
from sklearn.model_selection import train_test_split
from imblearn.ensemble import BalancedRandomForestClassifier

import warnings
warnings.filterwarnings('ignore')

2022-11-27 19:48:41.806277: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
data_filepath = "data/by_type/t2/counts_ctc_simulated_123_5k_t2.tsv"
true_results_filepath = "data/by_type/t2/ids_ctc_simulated_123_5k_t2.tsv"
train_indices_filepath = "data/by_type/t2/train_indices.npy"
test_indices_filepath = "data/by_type/t2/test_indices.npy"
SEED = 42
FOLD_NUMBER = 3

# You can change these values to work better for models
CUT_BY_MAX_THRESHOLD = 4
PCA_VARIABLES_AMOUNT = 60

# There are 4 data variants to check: regular, scaled, cut by max, pca reduced + cut by max
data_object = Data(data_filepath, true_results_filepath)
train_data, test_data, train_true_results, test_true_results =  data_object.load_train_test_split(train_indices_filepath, test_indices_filepath)
scaled_train_data, scaled_test_data = data_object.get_scaled_train_test_data()

FileNotFoundError: [Errno 2] No such file or directory: 'data/by_type/t1/counts_ctc_simulated_123_5k_t2.tsv'

In [None]:
cut_by_max_train_data, cut_by_max_test_data = data_object.get_cut_by_max_train_test_data(CUT_BY_MAX_THRESHOLD)

pca_object = PCADimensionReduction(cut_by_max_train_data, scaled_train_data, train_true_results, SEED)
pca_variables = pca_object.get_most_important_variables_from_pc1(PCA_VARIABLES_AMOUNT)
pca_reduced_train_data = cut_by_max_train_data[pca_variables.index]
pca_reduced_test_data = cut_by_max_test_data[pca_variables.index]

### Defined classifiers

In [None]:
# Logistic Regression
log_clf = LogisticRegression(random_state=SEED,
                             class_weight='balanced',
                             penalty='l1',
                             C=0.0005,
                             solver='liblinear')

# XGBoost
xgb_clf = XGBClassifier(random_state=SEED,
                        booster='gbtree',
                        eta=0.2,
                        min_child_weight=1,
                        max_depth=5,
                        gamma=0.5)

# K-Nearest Neighbors
knn_clf = KNeighborsClassifier(n_neighbors=5,
                               algorithm='kd_tree',
                               weights='distance')

# Isolation Forest
if_clf = IsolationForest(max_features=1,
                         n_estimators=1000,
                         bootstrap=True,
                         max_samples=1000,
                         n_jobs=12,
                         random_state=SEED)

# SVM
svm_clf = svm.SVC(kernel='linear',
                  class_weight='balanced',
                  probability=True,
                  random_state=SEED)

# Light GBM
lgbm_clf = LGBMClassifier(boosting_type='gbdt',
                          min_child_weight=0.001,
                          max_depth=10,
                          random_state=SEED,)

# Random Forest
rf_clf = RandomForestClassifier(random_state=SEED,
                                n_estimators=2000,
                                criterion="log_loss")

# Balanced Random Forest
brf_clf = BalancedRandomForestClassifier(random_state=SEED,
                                         n_estimators=2000)

In [None]:
names = ["Logistic Regression", "XGBoost", "K-Nearest Neighbors", "Isolated Forest",
         "SVM", "Light GBM", "Random Forest", "Balanced Random Forest"]
    
classifiers = [log_clf, xgb_clf, knn_clf, if_clf, svm_clf, lgbm_clf, rf_clf, brf_clf]

### Regular data

In [None]:
run_all_models(classifiers, names, train_data, train_true_results, test_data, test_true_results)

Features number: 2000
Best balanced accuracy: Isolated Forest, Random Forest, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: Isolated Forest, Random Forest, Balanced Random Forest
Best recall: Isolated Forest, Random Forest, Balanced Random Forest
Best F1 score: Isolated Forest, Random Forest, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.922222,1.0,0.998234,0.99823,0.998111
K-Nearest Neighbors,0.5,0.5,0.977368,0.988619,0.982961
Isolated Forest,1.0,1.0,1.0,1.0,1.0
SVM,0.5,0.806447,0.977368,0.988619,0.982961
Light GBM,0.944444,1.0,0.998738,0.998735,0.998676
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0


### Scaled data

In [None]:
run_all_models(classifiers, names, scaled_train_data, train_true_results, scaled_test_data, test_true_results)

Features number: 2000
Best balanced accuracy: Isolated Forest, Random Forest, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: Isolated Forest, Random Forest, Balanced Random Forest
Best recall: Isolated Forest, Random Forest, Balanced Random Forest
Best F1 score: Isolated Forest, Random Forest, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.966667,0.933419,0.999242,0.999241,0.999228
XGBoost,0.922222,1.0,0.998234,0.99823,0.998111
K-Nearest Neighbors,0.5,0.499488,0.977368,0.988619,0.982961
Isolated Forest,1.0,1.0,1.0,1.0,1.0
SVM,0.610599,0.775799,0.987264,0.990137,0.987361
Light GBM,0.966667,1.0,0.999242,0.999241,0.999219
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0


### Cut by max data

In [None]:
run_all_models(classifiers, names, cut_by_max_train_data, train_true_results, cut_by_max_test_data, test_true_results)

Features number: 66
Best balanced accuracy: Balanced Random Forest
Best ROC AUC: XGBoost, Light GBM, Random Forest
Best precision: Light GBM
Best recall: Light GBM
Best F1 score: Light GBM


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.977778,1.0,0.999495,0.999494,0.999486
K-Nearest Neighbors,0.522222,0.742637,0.98545,0.989125,0.98416
Isolated Forest,0.980941,0.999966,0.991426,0.962317,0.973832
SVM,0.695344,0.709798,0.982844,0.918816,0.947847
Light GBM,0.988889,1.0,0.999747,0.999747,0.999743
Random Forest,0.944444,1.0,0.998737,0.998735,0.998695
Balanced Random Forest,0.998209,0.999983,0.997362,0.996459,0.996717


### Pca reduced + cut by max data

In [None]:
run_all_models(classifiers, names, pca_reduced_train_data, train_true_results, pca_reduced_test_data, test_true_results)

Features number: 60
Best balanced accuracy: XGBoost, Light GBM
Best ROC AUC: XGBoost, Light GBM, Random Forest
Best precision: XGBoost, Light GBM
Best recall: XGBoost, Light GBM
Best F1 score: XGBoost, Light GBM


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,1.0,1.0,1.0,1.0,1.0
K-Nearest Neighbors,0.555556,0.809363,0.989987,0.989884,0.9858
Isolated Forest,0.955615,0.999881,0.989953,0.912241,0.945021
SVM,0.669745,0.472516,0.982242,0.846485,0.906305
Light GBM,1.0,1.0,1.0,1.0,1.0
Random Forest,0.955556,1.0,0.99899,0.998988,0.998953
Balanced Random Forest,0.997058,0.999983,0.996206,0.994183,0.99479


### P-values regular data

In [None]:
statistics = calculate_statistics(train_data, train_true_results.values.ravel())
statistics = statistics[statistics["p_values"] < 0.05]
p_values_regular_train_data = train_data[statistics.index]
p_values_regular_test_data = test_data[statistics.index]

run_all_models(classifiers, names, p_values_regular_train_data, train_true_results, p_values_regular_test_data, test_true_results)

Features number: 442
Best balanced accuracy: Isolated Forest, Random Forest, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: Isolated Forest, Random Forest, Balanced Random Forest
Best recall: Isolated Forest, Random Forest, Balanced Random Forest
Best F1 score: Isolated Forest, Random Forest, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.966667,1.0,0.999242,0.999241,0.999228
K-Nearest Neighbors,0.5,0.562292,0.977368,0.988619,0.982961
Isolated Forest,1.0,1.0,1.0,1.0,1.0
SVM,0.533333,0.738808,0.985698,0.989378,0.984703
Light GBM,0.966667,1.0,0.999242,0.999241,0.999228
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0


### P-values scaled data

In [None]:
p_values_scaled_train_data = scaled_train_data[statistics.index]
p_values_scaled_test_data = scaled_test_data[statistics.index]

run_all_models(classifiers, names, p_values_scaled_train_data, train_true_results, p_values_scaled_test_data, test_true_results)

Features number: 442
Best balanced accuracy: Isolated Forest, Random Forest, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: Isolated Forest, Random Forest, Balanced Random Forest
Best recall: Isolated Forest, Random Forest, Balanced Random Forest
Best F1 score: Isolated Forest, Random Forest, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.966667,0.933419,0.999242,0.999241,0.999228
XGBoost,0.966667,1.0,0.999242,0.999241,0.999228
K-Nearest Neighbors,0.57765,0.721821,0.989531,0.990137,0.986694
Isolated Forest,1.0,1.0,1.0,1.0,1.0
SVM,0.788889,0.714607,0.995218,0.995195,0.994535
Light GBM,0.966667,1.0,0.999242,0.999241,0.999228
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0


### P-values cut by max

In [None]:
statistics = calculate_statistics(cut_by_max_train_data, train_true_results.values.ravel())
statistics = statistics[statistics["p_values"] < 0.05]
p_values_cut_by_max_train_data = train_data[statistics.index]
p_values_cut_by_max_test_data = test_data[statistics.index]

run_all_models(classifiers, names, p_values_cut_by_max_train_data, train_true_results, p_values_cut_by_max_test_data, test_true_results)

Features number: 7
Best balanced accuracy: Random Forest
Best ROC AUC: XGBoost, Light GBM, Random Forest
Best precision: Random Forest
Best recall: Random Forest
Best F1 score: Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.977778,1.0,0.999495,0.999494,0.999476
K-Nearest Neighbors,0.710855,0.975544,0.992571,0.992919,0.991525
Isolated Forest,0.979534,0.997374,0.991143,0.959535,0.972082
SVM,0.60463,0.682101,0.980383,0.869752,0.919997
Light GBM,0.999872,1.0,0.999763,0.999747,0.999751
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,0.991558,0.999744,0.993233,0.983308,0.986768


### P-values pca reduced

In [None]:
statistics = calculate_statistics(pca_reduced_train_data, train_true_results.values.ravel())
statistics = statistics[statistics["p_values"] < 0.05]
p_values_pca_reduced_train_data = train_data[statistics.index]
p_values_pca_reduced_test_data = test_data[statistics.index]

run_all_models(classifiers, names, p_values_pca_reduced_train_data, train_true_results, p_values_pca_reduced_test_data, test_true_results)

Features number: 6
Best balanced accuracy: XGBoost
Best ROC AUC: XGBoost, Light GBM, Random Forest
Best precision: XGBoost
Best recall: XGBoost
Best F1 score: XGBoost


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,1.0,1.0,1.0,1.0,1.0
K-Nearest Neighbors,0.710472,0.974691,0.991585,0.99216,0.990855
Isolated Forest,0.951778,0.99618,0.989854,0.904654,0.940696
SVM,0.597723,0.672312,0.980206,0.856095,0.912116
Light GBM,0.999872,1.0,0.999763,0.999747,0.999751
Random Forest,0.988889,1.0,0.999747,0.999747,0.999743
Balanced Random Forest,0.992453,0.999471,0.99355,0.985078,0.98798


### Logistic regression coefficients

In [None]:
logistic_regression_features = pd.read_csv("features/logistic_regression.csv", index_col=0)
logistic_regression_features = logistic_regression_features[(logistic_regression_features != 0).all(1)]
logistic_regression_features = logistic_regression_features.sort_values(by="feature_importance_vals", ascending=False)

lg_train_data = train_data[logistic_regression_features.index[:25]]
lg_test_data = test_data[logistic_regression_features.index[:25]]
run_all_models(classifiers, names, lg_train_data, train_true_results, lg_test_data, test_true_results)

Features number: 25
Best balanced accuracy: Light GBM, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: Light GBM, Balanced Random Forest
Best recall: Light GBM, Balanced Random Forest
Best F1 score: Light GBM, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.966667,1.0,0.999242,0.999241,0.999219
K-Nearest Neighbors,0.5,0.5,0.977368,0.988619,0.982961
Isolated Forest,0.999616,1.0,0.999289,0.999241,0.999253
SVM,0.698081,0.628106,0.988028,0.989378,0.988551
Light GBM,1.0,1.0,1.0,1.0,1.0
Random Forest,0.988889,1.0,0.999747,0.999747,0.999743
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0


### First PCA component coefficients

In [None]:
pca_features = pd.read_csv("features/pca.csv", index_col=0)
pca_features = pca_features[(pca_features != 0).all(1)]
pca_features.sort_values(by="feature_importance_vals", ascending=False)

pca_train_data = train_data[pca_features.index[:100]]
pca_test_data = test_data[pca_features.index[:100]]
run_all_models(classifiers, names, pca_train_data, train_true_results, pca_test_data, test_true_results)

Features number: 100
Best balanced accuracy: Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: Light GBM
Best recall: Light GBM
Best F1 score: Light GBM


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.9,1.0,0.99773,0.997724,0.997563
K-Nearest Neighbors,0.644444,0.806788,0.991973,0.991907,0.989681
Isolated Forest,0.919417,1.0,0.989384,0.840668,0.903327
SVM,0.570964,0.507018,0.979524,0.933485,0.955231
Light GBM,0.977522,1.0,0.999049,0.998988,0.998986
Random Forest,0.933333,1.0,0.998485,0.998483,0.998419
Balanced Random Forest,0.998465,1.0,0.997621,0.996965,0.997147


### XGBoost feature importance

In [None]:
pca_reduced_xgboost_features = pd.read_csv("features/pca_reduced_xgboost.csv", index_col=0)
pca_reduced_xgboost_features = pca_reduced_xgboost_features[(pca_reduced_xgboost_features != 0).all(1)]
pca_reduced_xgboost_features.sort_values(by="feature_importance_vals", ascending=False)

xgb_train_data = train_data[pca_reduced_xgboost_features.index]
xgb_test_data = test_data[pca_reduced_xgboost_features.index]
run_all_models(classifiers, names, xgb_train_data, train_true_results, xgb_test_data, test_true_results)

Features number: 11
Best balanced accuracy: XGBoost, Light GBM
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest
Best precision: XGBoost, Light GBM
Best recall: XGBoost, Light GBM
Best F1 score: XGBoost, Light GBM


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,1.0,1.0,1.0,1.0,1.0
K-Nearest Neighbors,0.67765,0.887576,0.992176,0.992413,0.990702
Isolated Forest,0.975953,1.0,0.99085,0.952453,0.967969
SVM,0.639797,0.456076,0.982043,0.765554,0.85317
Light GBM,1.0,1.0,1.0,1.0,1.0
Random Forest,0.988889,1.0,0.999747,0.999747,0.999743
Balanced Random Forest,0.99693,0.999642,0.996079,0.99393,0.994576


### LightGBM feature importance

In [None]:
pca_reduced_lightgbm_features = pd.read_csv("features/pca_reduced_lightgbm.csv", index_col=0)
pca_reduced_lightgbm_features = pca_reduced_lightgbm_features[(pca_reduced_lightgbm_features != 0).all(1)]
pca_reduced_lightgbm_features.sort_values(by="feature_importance_vals", ascending=False)

lgbm_train_data = train_data[pca_reduced_lightgbm_features.index]
lgbm_test_data = test_data[pca_reduced_lightgbm_features.index]
run_all_models(classifiers, names, lgbm_train_data, train_true_results, lgbm_test_data, test_true_results)

Features number: 55
Best balanced accuracy: XGBoost, Light GBM
Best ROC AUC: XGBoost, Light GBM, Random Forest, Balanced Random Forest
Best precision: XGBoost, Light GBM
Best recall: XGBoost, Light GBM
Best F1 score: XGBoost, Light GBM


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,1.0,1.0,1.0,1.0,1.0
K-Nearest Neighbors,0.622094,0.796504,0.990715,0.991148,0.988519
Isolated Forest,0.949348,0.999505,0.989803,0.899848,0.937935
SVM,0.675467,0.483346,0.982537,0.814365,0.886868
Light GBM,1.0,1.0,1.0,1.0,1.0
Random Forest,0.977778,1.0,0.999495,0.999494,0.999486
Balanced Random Forest,0.997058,1.0,0.996215,0.994183,0.994793


### Random forest feature importance

In [None]:
random_forest_regular_features = pd.read_csv("features/regular_random_forest.csv", index_col=0)
random_forest_regular_features = random_forest_regular_features[(random_forest_regular_features != 0).all(1)]
random_forest_regular_features.sort_values(by="feature_importance_vals", ascending=False)

rf_train_data = train_data[random_forest_regular_features.index[:50]]
rf_test_data = test_data[random_forest_regular_features.index[:50]]
run_all_models(classifiers, names, rf_train_data, train_true_results, rf_test_data, test_true_results)

Features number: 50
Best balanced accuracy: Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best recall: Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best F1 score: Isolated Forest, Light GBM, Random Forest, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.955556,1.0,0.99899,0.998988,0.998942
K-Nearest Neighbors,0.5,0.511111,0.977368,0.988619,0.982961
Isolated Forest,1.0,1.0,1.0,1.0,1.0
SVM,0.732054,0.51222,0.990588,0.991401,0.990626
Light GBM,1.0,1.0,1.0,1.0,1.0
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0


### Balanced random forest feature importance

In [None]:
balanced_random_forest_regular_features = pd.read_csv("features/regular_balanced_random_forest.csv", index_col=0)
balanced_random_forest_regular_features = balanced_random_forest_regular_features[(balanced_random_forest_regular_features != 0).all(1)]
balanced_random_forest_regular_features.sort_values(by="feature_importance_vals", ascending=False)

brf_train_data = train_data[balanced_random_forest_regular_features.index[:50]]
brf_test_data = test_data[balanced_random_forest_regular_features.index[:50]]
run_all_models(classifiers, names, brf_train_data, train_true_results, brf_test_data, test_true_results)

Features number: 50
Best balanced accuracy: XGBoost, Isolated Forest, Random Forest, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Light GBM, Random Forest, Balanced Random Forest
Best precision: XGBoost, Isolated Forest, Random Forest, Balanced Random Forest
Best recall: XGBoost, Isolated Forest, Random Forest, Balanced Random Forest
Best F1 score: XGBoost, Isolated Forest, Random Forest, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,1.0,1.0,1.0,1.0,1.0
K-Nearest Neighbors,0.511111,0.698184,0.981409,0.988872,0.983561
Isolated Forest,1.0,1.0,1.0,1.0,1.0
SVM,0.830519,0.770785,0.99168,0.990642,0.99109
Light GBM,0.966667,1.0,0.999242,0.999241,0.999219
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0


### Features with biggest error for autoencoder 

In [None]:
autoencoder_features = pd.read_csv("features/autoencoder_new.csv", index_col=0)
autoencoder_features.sort_values(by="feature_importance_vals", ascending=False)

autoencoder_train_data = train_data[autoencoder_features.index[:5]]
autoencoder_test_data = test_data[autoencoder_features.index[:5]]
run_all_models(classifiers, names, autoencoder_train_data, train_true_results, autoencoder_test_data, test_true_results)

Features number: 5
Best balanced accuracy: Isolated Forest, Random Forest, Balanced Random Forest
Best ROC AUC: XGBoost, Isolated Forest, Random Forest, Balanced Random Forest
Best precision: Isolated Forest, Random Forest, Balanced Random Forest
Best recall: Isolated Forest, Random Forest, Balanced Random Forest
Best F1 score: Isolated Forest, Random Forest, Balanced Random Forest


Unnamed: 0,Balanced Accuracy,ROC AUC,Precision,Recall,F1 score
Logistic Regression,0.5,0.5,0.977368,0.988619,0.982961
XGBoost,0.977778,1.0,0.999495,0.999494,0.999476
K-Nearest Neighbors,0.93295,0.988565,0.997668,0.997724,0.997675
Isolated Forest,1.0,1.0,1.0,1.0,1.0
SVM,0.60996,0.555692,0.986194,0.988872,0.986625
Light GBM,0.5,0.5,0.977368,0.988619,0.982961
Random Forest,1.0,1.0,1.0,1.0,1.0
Balanced Random Forest,1.0,1.0,1.0,1.0,1.0
