<h1>Handling Imbalanced Dataset With Machine Learning


In [1]:
from sklearn.datasets import make_classification
from sklearn.decomposition import PCA
from collections import Counter
from itable import PrettyTable, TableStyle,CellStyle
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import make_blobs
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
import warnings
from xgboost import XGBRegressor
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from imblearn.combine import SMOTEENN, SMOTETomek
from imblearn.under_sampling import EditedNearestNeighbours, TomekLinks

<h1> GAIT Data Set

In [2]:
df =pd.read_csv('gait1.csv')
df.head()

Unnamed: 0,BMI,STS,y
0,33.4,15.3,0
1,29.4,11.1,0
2,24.5,12.3,0
3,20.9,11.0,0
4,31.9,11.6,0


In [3]:
# data size
df.shape

(111, 3)

In [4]:
# imbalanced target
df.y.value_counts() / len(df)


0    0.792793
1    0.207207
Name: y, dtype: float64

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(labels=['y'], axis=1),  # drop the y
    df['y'],  # just the y
    test_size=0.3,
    random_state=0)

X_train.shape, X_test.shape

((77, 2), (34, 2))

<h3> Logistic Regression on Original Data

In [6]:
log_class= LogisticRegression()
grid={'C':10.0 **np.arange(-2,3),'penalty':['l1','l2']}
cv=KFold(n_splits=5,random_state=None,shuffle=False)
clf=GridSearchCV(log_class,grid,cv=cv,n_jobs=-1)
clf.fit(X_train,y_train)

GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=False),
             estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': array([1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02]),
                         'penalty': ['l1', 'l2']})

In [7]:
print(confusion_matrix(y_test,clf.predict(X_test)))
print(classification_report(y_test, clf.predict(X_test)))
print(accuracy_score(y_test,clf.predict(X_test)))

[[29  0]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.85      1.00      0.92        29
           1       0.00      0.00      0.00         5

    accuracy                           0.85        34
   macro avg       0.43      0.50      0.46        34
weighted avg       0.73      0.85      0.79        34

0.8529411764705882


  _warn_prf(average, modifier, msg_start, len(result))


<h3> Random Forest on orig. data

In [8]:
rf = RandomForestClassifier()
rf.fit(X_train,y_train)

RandomForestClassifier()

In [9]:
print(confusion_matrix(y_test,rf.predict(X_test)))
print(classification_report(y_test, rf.predict(X_test)))
print(accuracy_score(y_test,rf.predict(X_test)))

[[27  2]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.84      0.93      0.89        29
           1       0.00      0.00      0.00         5

    accuracy                           0.79        34
   macro avg       0.42      0.47      0.44        34
weighted avg       0.72      0.79      0.76        34

0.7941176470588235


<h3> Ada Boost on original data

In [10]:
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)

AdaBoostClassifier()

In [11]:
print(confusion_matrix(y_test,ada.predict(X_test)))
print(classification_report(y_test, ada.predict(X_test)))
print(accuracy_score(y_test,ada.predict(X_test)))

[[24  5]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.86      0.83      0.84        29
           1       0.17      0.20      0.18         5

    accuracy                           0.74        34
   macro avg       0.51      0.51      0.51        34
weighted avg       0.76      0.74      0.75        34

0.7352941176470589


<h3>SVM on Orig. Data

In [12]:
sv = svm.SVC(kernel='linear')
sv.fit(X_train, y_train)

SVC(kernel='linear')

In [13]:
print(confusion_matrix(y_test,sv.predict(X_test)))
print(classification_report(y_test, sv.predict(X_test)))
print(accuracy_score(y_test,sv.predict(X_test)))

[[29  0]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.85      1.00      0.92        29
           1       0.00      0.00      0.00         5

    accuracy                           0.85        34
   macro avg       0.43      0.50      0.46        34
weighted avg       0.73      0.85      0.79        34

0.8529411764705882


  _warn_prf(average, modifier, msg_start, len(result))



<h3>Decision Tree on Orig. Data

In [14]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

DecisionTreeClassifier()

In [15]:
print(confusion_matrix(y_test,dt.predict(X_test)))
print(classification_report(y_test, dt.predict(X_test)))
print(accuracy_score(y_test,dt.predict(X_test)))

[[24  5]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.86      0.83      0.84        29
           1       0.17      0.20      0.18         5

    accuracy                           0.74        34
   macro avg       0.51      0.51      0.51        34
weighted avg       0.76      0.74      0.75        34

0.7352941176470589


<h3>Naive Bayes on orign. Data

In [16]:
nb = GaussianNB()
nb.fit(X_train, y_train)

GaussianNB()

In [17]:
print(confusion_matrix(y_test,nb.predict(X_test)))
print(classification_report(y_test, nb.predict(X_test)))
print(accuracy_score(y_test,nb.predict(X_test)))

[[28  1]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.88      0.97      0.92        29
           1       0.50      0.20      0.29         5

    accuracy                           0.85        34
   macro avg       0.69      0.58      0.60        34
weighted avg       0.82      0.85      0.83        34

0.8529411764705882


<h3>KNN on Orig. Data

In [18]:
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(X_train, y_train)


KNeighborsClassifier(n_neighbors=3)

In [19]:
print(confusion_matrix(y_test,kn.predict(X_test)))
print(classification_report(y_test, kn.predict(X_test)))
print(accuracy_score(y_test,kn.predict(X_test)))

[[28  1]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.85      0.97      0.90        29
           1       0.00      0.00      0.00         5

    accuracy                           0.82        34
   macro avg       0.42      0.48      0.45        34
weighted avg       0.72      0.82      0.77        34

0.8235294117647058


<h2>Applying SMOTE on real data

In [20]:
sm = SMOTE(
    sampling_strategy='auto',  # samples only the minority class
    random_state=0,  # for reproducibility
    k_neighbors=5,
    n_jobs=4
)

X_resampled, y_resampled = sm.fit_resample(X_train, y_train)

In [21]:
X_resampled.shape, y_resampled.shape


((118, 2), (118,))

In [22]:
y_train.value_counts(), y_resampled.value_counts()

(0    59
 1    18
 Name: y, dtype: int64,
 1    59
 0    59
 Name: y, dtype: int64)

<h3>Logistic Regrission on SMOTE

In [23]:
log_class= LogisticRegression()
grid={'C':10.0 **np.arange(-2,3),'penalty':['l1','l2']}
cv=KFold(n_splits=5,random_state=None,shuffle=False)
clf=GridSearchCV(log_class,grid,cv=cv,n_jobs=-1)
clf.fit(X_resampled,y_resampled)

GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=False),
             estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': array([1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02]),
                         'penalty': ['l1', 'l2']})

In [24]:
print(confusion_matrix(y_test,clf.predict(X_test)))
print(classification_report(y_test, clf.predict(X_test)))
print(accuracy_score(y_test,clf.predict(X_test)))

[[21  8]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.81      0.72      0.76        29
           1       0.00      0.00      0.00         5

    accuracy                           0.62        34
   macro avg       0.40      0.36      0.38        34
weighted avg       0.69      0.62      0.65        34

0.6176470588235294


<h3> Random Forest On SMOTE

In [25]:
rf = RandomForestClassifier()
rf.fit(X_resampled,y_resampled)

RandomForestClassifier()

In [26]:
print(confusion_matrix(y_test,rf.predict(X_test)))
print(classification_report(y_test, rf.predict(X_test)))
print(accuracy_score(y_test,rf.predict(X_test)))

[[24  5]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.86      0.83      0.84        29
           1       0.17      0.20      0.18         5

    accuracy                           0.74        34
   macro avg       0.51      0.51      0.51        34
weighted avg       0.76      0.74      0.75        34

0.7352941176470589


<h3> Ada Boost on SMOTE

In [27]:
ada = AdaBoostClassifier()
ada.fit(X_resampled, y_resampled)

AdaBoostClassifier()

In [28]:
print(confusion_matrix(y_test,ada.predict(X_test)))
print(classification_report(y_test, ada.predict(X_test)))
print(accuracy_score(y_test,ada.predict(X_test)))

[[25  4]
 [ 3  2]]
              precision    recall  f1-score   support

           0       0.89      0.86      0.88        29
           1       0.33      0.40      0.36         5

    accuracy                           0.79        34
   macro avg       0.61      0.63      0.62        34
weighted avg       0.81      0.79      0.80        34

0.7941176470588235


<h3>SVM on SMOTE

In [29]:
sv = svm.SVC(kernel='linear')
sv.fit(X_resampled, y_resampled)

SVC(kernel='linear')

In [30]:
print(confusion_matrix(y_test,sv.predict(X_test)))
print(classification_report(y_test, sv.predict(X_test)))
print(accuracy_score(y_test,sv.predict(X_test)))

[[16 13]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.80      0.55      0.65        29
           1       0.07      0.20      0.11         5

    accuracy                           0.50        34
   macro avg       0.44      0.38      0.38        34
weighted avg       0.69      0.50      0.57        34

0.5


<h3>Decision Tree on SMOTE

In [31]:
dt = DecisionTreeClassifier()
dt.fit(X_resampled, y_resampled)

DecisionTreeClassifier()

In [32]:
print(confusion_matrix(y_test,dt.predict(X_test)))
print(classification_report(y_test, dt.predict(X_test)))
print(accuracy_score(y_test,dt.predict(X_test)))

[[22  7]
 [ 3  2]]
              precision    recall  f1-score   support

           0       0.88      0.76      0.81        29
           1       0.22      0.40      0.29         5

    accuracy                           0.71        34
   macro avg       0.55      0.58      0.55        34
weighted avg       0.78      0.71      0.74        34

0.7058823529411765


<h3>Naive Bayes on SMOTE

In [33]:
nb = GaussianNB()
nb.fit(X_resampled, y_resampled)

GaussianNB()

In [34]:
print(confusion_matrix(y_test,nb.predict(X_test)))
print(classification_report(y_test, nb.predict(X_test)))
print(accuracy_score(y_test,nb.predict(X_test)))

[[22  7]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.85      0.76      0.80        29
           1       0.12      0.20      0.15         5

    accuracy                           0.68        34
   macro avg       0.49      0.48      0.48        34
weighted avg       0.74      0.68      0.70        34

0.6764705882352942


<h3>KNN on SMOTE

In [35]:
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(X_resampled, y_resampled)


KNeighborsClassifier(n_neighbors=3)

In [36]:
print(confusion_matrix(y_test,kn.predict(X_test)))
print(classification_report(y_test, kn.predict(X_test)))
print(accuracy_score(y_test,kn.predict(X_test)))

[[20  9]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.83      0.69      0.75        29
           1       0.10      0.20      0.13         5

    accuracy                           0.62        34
   macro avg       0.47      0.44      0.44        34
weighted avg       0.73      0.62      0.66        34

0.6176470588235294


<h2>Applying SMOTE with Edited Nearest Neighbour(SMOTEENN)

In [37]:
# need ENN  as argument of SMOTEENN
enn = EditedNearestNeighbours(
    sampling_strategy='auto',
    n_neighbors=3,
    kind_sel='all',
    n_jobs=4)


smenn = SMOTEENN(
    sampling_strategy='auto',  # samples only the minority class
    random_state=0,  # for reproducibility
    smote=sm,
    enn=enn,
    n_jobs=4
)

X_smenn, y_smenn = smenn.fit_resample(X_train, y_train)

In [38]:
# size of oversampled datasets

X_resampled.shape, X_smenn.shape

((118, 2), (84, 2))

In [39]:
# number of minority class observations

y_train.value_counts(), y_resampled.value_counts(), y_smenn.value_counts()

(0    59
 1    18
 Name: y, dtype: int64,
 1    59
 0    59
 Name: y, dtype: int64,
 0    59
 1    25
 Name: y, dtype: int64)

In [None]:
<h3>Logistic with SMOTEENN

In [40]:
log_class= LogisticRegression()
grid={'C':10.0 **np.arange(-2,3),'penalty':['l1','l2']}
cv=KFold(n_splits=5,random_state=None,shuffle=False)
clf=GridSearchCV(log_class,grid,cv=cv,n_jobs=-1)
clf.fit(X_smenn, y_smenn)

GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=False),
             estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': array([1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02]),
                         'penalty': ['l1', 'l2']})

In [41]:
print(confusion_matrix(y_test,clf.predict(X_test)))
print(classification_report(y_test, clf.predict(X_test)))
print(accuracy_score(y_test,clf.predict(X_test)))

[[28  1]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.85      0.97      0.90        29
           1       0.00      0.00      0.00         5

    accuracy                           0.82        34
   macro avg       0.42      0.48      0.45        34
weighted avg       0.72      0.82      0.77        34

0.8235294117647058


<h3>Random Forest With SMOTEENN

In [42]:
rf = RandomForestClassifier()
rf.fit(X_smenn, y_smenn)

RandomForestClassifier()

In [43]:
print(confusion_matrix(y_test,rf.predict(X_test)))
print(classification_report(y_test, rf.predict(X_test)))
print(accuracy_score(y_test,rf.predict(X_test)))

[[27  2]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.84      0.93      0.89        29
           1       0.00      0.00      0.00         5

    accuracy                           0.79        34
   macro avg       0.42      0.47      0.44        34
weighted avg       0.72      0.79      0.76        34

0.7941176470588235


<h3>Ada Boost With SMOTEENN

In [44]:
ada = AdaBoostClassifier()
ada.fit(X_smenn, y_smenn)

AdaBoostClassifier()

In [45]:
print(confusion_matrix(y_test,ada.predict(X_test)))
print(classification_report(y_test, ada.predict(X_test)))
print(accuracy_score(y_test,ada.predict(X_test)))

[[26  3]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.84      0.90      0.87        29
           1       0.00      0.00      0.00         5

    accuracy                           0.76        34
   macro avg       0.42      0.45      0.43        34
weighted avg       0.72      0.76      0.74        34

0.7647058823529411


<h3>SVM with SMOTEENN

In [None]:
sv = svm.SVC(kernel='linear')
sv.fit(X_smenn, y_smenn)

In [47]:
print(confusion_matrix(y_test,sv.predict(X_test)))
print(classification_report(y_test, sv.predict(X_test)))
print(accuracy_score(y_test,sv.predict(X_test)))

[[27  2]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.84      0.93      0.89        29
           1       0.00      0.00      0.00         5

    accuracy                           0.79        34
   macro avg       0.42      0.47      0.44        34
weighted avg       0.72      0.79      0.76        34

0.7941176470588235


<h3>Decision Tree with SMOTEENN

In [48]:
dt = DecisionTreeClassifier()
dt.fit(X_smenn, y_smenn)

DecisionTreeClassifier()

In [49]:
print(confusion_matrix(y_test,dt.predict(X_test)))
print(classification_report(y_test, dt.predict(X_test)))
print(accuracy_score(y_test,dt.predict(X_test)))

[[27  2]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.84      0.93      0.89        29
           1       0.00      0.00      0.00         5

    accuracy                           0.79        34
   macro avg       0.42      0.47      0.44        34
weighted avg       0.72      0.79      0.76        34

0.7941176470588235


<h3>Naive Bayes with SMOTEENN

In [50]:
nb = GaussianNB()
nb.fit(X_smenn, y_smenn)

GaussianNB()

In [51]:
print(confusion_matrix(y_test,nb.predict(X_test)))
print(classification_report(y_test, nb.predict(X_test)))
print(accuracy_score(y_test,nb.predict(X_test)))

[[27  2]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.84      0.93      0.89        29
           1       0.00      0.00      0.00         5

    accuracy                           0.79        34
   macro avg       0.42      0.47      0.44        34
weighted avg       0.72      0.79      0.76        34

0.7941176470588235


<h3>KNN With SMOTEENN

In [52]:
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(X_smenn, y_smenn)


KNeighborsClassifier(n_neighbors=3)

In [53]:
print(confusion_matrix(y_test,kn.predict(X_test)))
print(classification_report(y_test, kn.predict(X_test)))
print(accuracy_score(y_test,kn.predict(X_test)))

[[26  3]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.84      0.90      0.87        29
           1       0.00      0.00      0.00         5

    accuracy                           0.76        34
   macro avg       0.42      0.45      0.43        34
weighted avg       0.72      0.76      0.74        34

0.7647058823529411


<h2>Applying SMOTE With Tomek (SMOTETomek)

In [54]:
# need tomek as argument of SMOTETomek
tl = TomekLinks(
    sampling_strategy='all',
    n_jobs=4)

smtomek = SMOTETomek(
    sampling_strategy='auto',  # samples only the minority class
    random_state=0,  # for reproducibility
    smote=sm,
    tomek=tl,
    n_jobs=4
)

X_smtl, y_smtl = smtomek.fit_resample(X_train, y_train)

In [55]:
# size of oversampled datasets

X_resampled.shape, X_smtl.shape

((118, 2), (98, 2))

In [56]:
# number of minority class observations

y_train.value_counts(), y_resampled.value_counts(),y_smtl.value_counts()
 

(0    59
 1    18
 Name: y, dtype: int64,
 1    59
 0    59
 Name: y, dtype: int64,
 1    49
 0    49
 Name: y, dtype: int64)

<h2>Logistic With SMOTETomek

In [57]:
log_class= LogisticRegression()
grid={'C':10.0 **np.arange(-2,3),'penalty':['l1','l2']}
cv=KFold(n_splits=5,random_state=None,shuffle=False)
clf=GridSearchCV(log_class,grid,cv=cv,n_jobs=-1)
clf.fit(X_smtl, y_smtl)

GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=False),
             estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': array([1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02]),
                         'penalty': ['l1', 'l2']})

In [58]:
print(confusion_matrix(y_test,clf.predict(X_test)))
print(classification_report(y_test, clf.predict(X_test)))
print(accuracy_score(y_test,clf.predict(X_test)))

[[21  8]
 [ 5  0]]
              precision    recall  f1-score   support

           0       0.81      0.72      0.76        29
           1       0.00      0.00      0.00         5

    accuracy                           0.62        34
   macro avg       0.40      0.36      0.38        34
weighted avg       0.69      0.62      0.65        34

0.6176470588235294


<h3>Random Forest With SMOTETomek

In [59]:
rf = RandomForestClassifier()
rf.fit(X_smtl, y_smtl)

RandomForestClassifier()

In [60]:
print(confusion_matrix(y_test,rf.predict(X_test)))
print(classification_report(y_test, rf.predict(X_test)))
print(accuracy_score(y_test,rf.predict(X_test)))

[[22  7]
 [ 3  2]]
              precision    recall  f1-score   support

           0       0.88      0.76      0.81        29
           1       0.22      0.40      0.29         5

    accuracy                           0.71        34
   macro avg       0.55      0.58      0.55        34
weighted avg       0.78      0.71      0.74        34

0.7058823529411765


<h3>AdaBoost With SMOTETomek

In [61]:
ada = AdaBoostClassifier()
ada.fit(X_smtl, y_smtl)

AdaBoostClassifier()

In [62]:
print(confusion_matrix(y_test,ada.predict(X_test)))
print(classification_report(y_test, ada.predict(X_test)))
print(accuracy_score(y_test,ada.predict(X_test)))

[[24  5]
 [ 3  2]]
              precision    recall  f1-score   support

           0       0.89      0.83      0.86        29
           1       0.29      0.40      0.33         5

    accuracy                           0.76        34
   macro avg       0.59      0.61      0.60        34
weighted avg       0.80      0.76      0.78        34

0.7647058823529411


<h3>SVM With SMOTETomek

In [63]:
sv = svm.SVC(kernel='linear')
sv.fit(X_smtl, y_smtl)

SVC(kernel='linear')

In [64]:
print(confusion_matrix(y_test,sv.predict(X_test)))
print(classification_report(y_test, sv.predict(X_test)))
print(accuracy_score(y_test,sv.predict(X_test)))

[[15 14]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.79      0.52      0.62        29
           1       0.07      0.20      0.10         5

    accuracy                           0.47        34
   macro avg       0.43      0.36      0.36        34
weighted avg       0.68      0.47      0.55        34

0.47058823529411764


<h3>Decision Tree With SMOTETomek 

In [65]:
dt = DecisionTreeClassifier()
dt.fit(X_smtl, y_smtl)

DecisionTreeClassifier()

In [66]:
print(confusion_matrix(y_test,dt.predict(X_test)))
print(classification_report(y_test, dt.predict(X_test)))
print(accuracy_score(y_test,dt.predict(X_test)))

[[23  6]
 [ 3  2]]
              precision    recall  f1-score   support

           0       0.88      0.79      0.84        29
           1       0.25      0.40      0.31         5

    accuracy                           0.74        34
   macro avg       0.57      0.60      0.57        34
weighted avg       0.79      0.74      0.76        34

0.7352941176470589


<h3>Naive Bayes With SMOTETomek

In [67]:
nb = GaussianNB()
nb.fit(X_smtl, y_smtl)

GaussianNB()

In [68]:
print(confusion_matrix(y_test,nb.predict(X_test)))
print(classification_report(y_test, nb.predict(X_test)))
print(accuracy_score(y_test,nb.predict(X_test)))

[[22  7]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.85      0.76      0.80        29
           1       0.12      0.20      0.15         5

    accuracy                           0.68        34
   macro avg       0.49      0.48      0.48        34
weighted avg       0.74      0.68      0.70        34

0.6764705882352942


<h3>KNN With SMOTETomek

In [69]:
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(X_smtl, y_smtl)


KNeighborsClassifier(n_neighbors=3)

In [70]:
print(confusion_matrix(y_test,kn.predict(X_test)))
print(classification_report(y_test, kn.predict(X_test)))
print(accuracy_score(y_test,kn.predict(X_test)))

[[19 10]
 [ 4  1]]
              precision    recall  f1-score   support

           0       0.83      0.66      0.73        29
           1       0.09      0.20      0.13         5

    accuracy                           0.59        34
   macro avg       0.46      0.43      0.43        34
weighted avg       0.72      0.59      0.64        34

0.5882352941176471


<h1> Train ML Models

<h2> Random Forests

<h2> Logistic regression

<h2> Accuracy