imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, balanced_accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_validate

In [2]:

def preprocess(file_path,target_column_index,has_header,delete_list,extraction_list):
    """this function is used for taking the dataset from .csv file making a test train split
    and x, y split (has header is either True or False)"""

    #reading the .csv
    if has_header:
        df = pd.read_csv(file_path, header=0, delimiter=",")  # First row as header
        print(f"Dataset shape: {df.shape}")
        print(f"Column names: {list(df.columns)}")
    else:
        df = pd.read_csv(file_path, header=None, delimiter=",")  # No header row
        print(f"Dataset shape: {df.shape}")

    # Find all string/object columns automatically
    string_columns = df.select_dtypes(include=['object']).columns
    print(string_columns)
    # Encode all string columns
    for column in string_columns:
        encoder = LabelEncoder()
        df[column] = encoder.fit_transform(df[column])

    df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)

    #making target variable split
    x_first = df_shuffled.drop(df.columns[target_column_index], axis=1)  # All except target

    x_ds = x_first.drop(columns = delete_list + extraction_list)
    y_ds = df_shuffled.iloc[:, target_column_index]  # Target column only
    

    print(f'shape of x: {x_ds.shape}')

    return x_ds, y_ds

In [3]:
def implement(model, x_ds, y_ds):
    """this function contains implementation (modeling and fitting) on the dataset with 1 
    initiation and shows the results of the implementations.
    Note: This function doesn't have compile so it is not appoprate to use this function for perceptron learning algs.
    because it doesn't have training vs test accuracy comparison and doesn't have epochs and .argmax(axis=1) """
    
    # classical model evaluation
    """
    #initiante ml
    model.fit(x_ds, y_ds)
    y_pred = model.predict(x_test)
    print(f'Confusion matrix: \n{confusion_matrix(y_test, y_pred)}')
    print(f'Classification report: {classification_report(y_test, y_pred)}')
    print(f"Accuracy score is: {accuracy_score(y_test,y_pred):.3f}")
    print(f"Balanced accuracy score is: {balanced_accuracy_score(y_test,y_pred):.3f}")
    print(f"Precision score is: {precision_score(y_test,y_pred, average='weighted'):.3f}")
    print(f"Recall score is: {recall_score(y_test,y_pred, average='weighted'):.3f}")
    """
    # StratifiedKFold evaluations
    skf = StratifiedKFold(n_splits=10)
    scoring = ['balanced_accuracy', 'accuracy', 'precision_weighted', 'recall_weighted']
    cv_results = cross_validate(model, x_ds, y_ds, cv=skf, scoring=scoring)

    #if type(model) == KNeighborsClassifier:
    #    knn_cv_bacc = round(cv_results['test_balanced_accuracy'].mean(),3)
    #    knn_cv_precision = round(cv_results['test_precision_weighted'].mean(),3)
    #    knn_cv_recall = round(cv_results['test_recall_weighted'].mean(),3)
        

    print(f"StratifiedKFold CV Balanced Accuracy: {cv_results['test_balanced_accuracy'].mean():.3f}±{cv_results['test_balanced_accuracy'].std():.2f})")
    #print(f"StratifiedKFold CV Accuracy: {cv_results['test_accuracy'].mean():.3f}±{cv_results['test_accuracy'].std():.2f})")
    print(f"StratifiedKFold CV Precision: {cv_results['test_precision_weighted'].mean():.3f}±{cv_results['test_precision_weighted'].std():.2f})")
    print(f"StratifiedKFold CV Recall: {cv_results['test_recall_weighted'].mean():.3f}(±{cv_results['test_recall_weighted'].std():.2f})")

    return cv_results 
        

Select the dataset!

Runs the preprocess for diffrent datasets.

In [4]:
x_ds1, y_ds1 = preprocess("beans_kmeans.csv", 16, True, [], [])

Dataset shape: (13611, 17)
Column names: ['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'AspectRation', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 'Extent', 'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2', 'ShapeFactor3', 'ShapeFactor4', 'Class']
Index(['Class'], dtype='object')
shape of x: (13611, 16)


In [5]:
x_ds2, y_ds2 = preprocess("beans_kmeans.csv", 16, True, [],["ShapeFactor4","Solidity","Extent"] )

Dataset shape: (13611, 17)
Column names: ['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'AspectRation', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 'Extent', 'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2', 'ShapeFactor3', 'ShapeFactor4', 'Class']
Index(['Class'], dtype='object')
shape of x: (13611, 13)


x_ds, y_ds = preprocess("diabetes_kmeans.csv", 8, True, [], [])

x_ds, y_ds = preprocess("diabetes_kmeans.csv", 8, True, [], [])

In [6]:
x_ds3, y_ds3 = preprocess("divorce.csv", 54, True, [], [])

Dataset shape: (170, 55)
Column names: ['Atr1', 'Atr2', 'Atr3', 'Atr4', 'Atr5', 'Atr6', 'Atr7', 'Atr8', 'Atr9', 'Atr10', 'Atr11', 'Atr12', 'Atr13', 'Atr14', 'Atr15', 'Atr16', 'Atr17', 'Atr18', 'Atr19', 'Atr20', 'Atr21', 'Atr22', 'Atr23', 'Atr24', 'Atr25', 'Atr26', 'Atr27', 'Atr28', 'Atr29', 'Atr30', 'Atr31', 'Atr32', 'Atr33', 'Atr34', 'Atr35', 'Atr36', 'Atr37', 'Atr38', 'Atr39', 'Atr40', 'Atr41', 'Atr42', 'Atr43', 'Atr44', 'Atr45', 'Atr46', 'Atr47', 'Atr48', 'Atr49', 'Atr50', 'Atr51', 'Atr52', 'Atr53', 'Atr54', 'Class']
Index([], dtype='object')
shape of x: (170, 54)


In [7]:
x_ds4, y_ds4 = preprocess("divorce.csv", 54, True, [], ['Atr53','Atr7','Atr47','Atr48','Atr52','Atr43','Atr45','Atr6','Atr46','Atr42','Atr49','Atr51'])

Dataset shape: (170, 55)
Column names: ['Atr1', 'Atr2', 'Atr3', 'Atr4', 'Atr5', 'Atr6', 'Atr7', 'Atr8', 'Atr9', 'Atr10', 'Atr11', 'Atr12', 'Atr13', 'Atr14', 'Atr15', 'Atr16', 'Atr17', 'Atr18', 'Atr19', 'Atr20', 'Atr21', 'Atr22', 'Atr23', 'Atr24', 'Atr25', 'Atr26', 'Atr27', 'Atr28', 'Atr29', 'Atr30', 'Atr31', 'Atr32', 'Atr33', 'Atr34', 'Atr35', 'Atr36', 'Atr37', 'Atr38', 'Atr39', 'Atr40', 'Atr41', 'Atr42', 'Atr43', 'Atr44', 'Atr45', 'Atr46', 'Atr47', 'Atr48', 'Atr49', 'Atr50', 'Atr51', 'Atr52', 'Atr53', 'Atr54', 'Class']
Index([], dtype='object')
shape of x: (170, 42)


In [8]:
x_ds5, y_ds5 = preprocess("parkinsons_kmeans.csv", 17, True, ['name'], [])

Dataset shape: (195, 24)
Column names: ['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5', 'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA', 'spread1', 'spread2', 'D2', 'PPE']
Index(['name'], dtype='object')
shape of x: (195, 22)


In [9]:
x_ds6, y_ds6 = preprocess("parkinsons_kmeans.csv", 17, True, ['name'], ['NHR','D2','MDVP:Jitter(%)','RPDE','Jitter:DDP','MDVP:RAP', 'MDVP:Fhi(Hz)','DFA','MDVP:PPQ'])

Dataset shape: (195, 24)
Column names: ['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5', 'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA', 'spread1', 'spread2', 'D2', 'PPE']
Index(['name'], dtype='object')
shape of x: (195, 13)


In [10]:
x_ds7, y_ds7 = preprocess("rice_binned_kmeans.csv", 7, True, [], [])

Dataset shape: (3810, 8)
Column names: ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length', 'Eccentricity', 'Convex_Area', 'Extent', 'Class']
Index(['Class'], dtype='object')
shape of x: (3810, 7)


In [11]:
x_ds8, y_ds8 = preprocess("rice_binned_kmeans.csv", 7, True, [], ['Minor_Axis_Length', 'Extent'])

Dataset shape: (3810, 8)
Column names: ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length', 'Eccentricity', 'Convex_Area', 'Extent', 'Class']
Index(['Class'], dtype='object')
shape of x: (3810, 5)


x_ds, y_ds = preprocess("wdbc_binned_kmeans.csv", 1, True, ['ID'], [])

x_ds, y_ds = preprocess("wdbc_binned_kmeans.csv", 1, True, ['ID'], [])

Run the implement function for diffrent models

In [12]:
def compute_model_differences(model_name, globals_dict):
    """
    Creates variables like knn_0_bacc, knn_1_precision, etc.
    Each value formatted for LaTeX:
      e.g. '+0.011 \\scriptsize(%1)' or '-0.002 \\scriptsize(%0.2)'
    If diff == 0 → '0'
    """
    metrics = {
        "bacc": "balanced_accuracy",
        "precision": "precision_weighted",
        "recall": "recall_weighted"
    }

    for i in range(0, 8, 2):  # pairs: (1,2), (3,4), (5,6), (7,8)
        idx = i // 2
        cv_a = globals_dict[f"cv_results{i+1}"]
        cv_b = globals_dict[f"cv_results{i+2}"]

        for short, metric in metrics.items():
            var_name = f"{model_name}_{idx}_{short}"

            mean_a = cv_a[f"test_{metric}"].mean()
            mean_b = cv_b[f"test_{metric}"].mean()
            diff = round(mean_b - mean_a, 3)

            if diff == 0:
                formatted = "0"
            else:
                pct = round(abs(diff / mean_a * 100), 1)
                sign = "+" if diff > 0 else ""
                formatted = f"{sign}{diff:.3f} \\scriptsize(\\%{pct})"

            globals_dict[var_name] = formatted


In [13]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
cv_results1 = implement(KNeighborsClassifier(),
                     x_ds1, y_ds1)

cv_results2 = implement(KNeighborsClassifier(),
                     x_ds2, y_ds2)

cv_results3 = implement(KNeighborsClassifier(),
                     x_ds3, y_ds3)

cv_results4 = implement(KNeighborsClassifier(),
                     x_ds4, y_ds4)

cv_results5 = implement(KNeighborsClassifier(),
                     x_ds5, y_ds5)

cv_results6 = implement(KNeighborsClassifier(),
                     x_ds6, y_ds6)

cv_results7 = implement(KNeighborsClassifier(),
                     x_ds7, y_ds7)

cv_results8 = implement(KNeighborsClassifier(),
                     x_ds8, y_ds8)


compute_model_differences("knn", globals())


StratifiedKFold CV Balanced Accuracy: 0.928±0.01)
StratifiedKFold CV Precision: 0.916±0.01)
StratifiedKFold CV Recall: 0.915(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.916±0.01)
StratifiedKFold CV Precision: 0.906±0.01)
StratifiedKFold CV Recall: 0.906(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.876±0.09)
StratifiedKFold CV Precision: 0.931±0.04)
StratifiedKFold CV Recall: 0.923(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.857±0.08)
StratifiedKFold CV Precision: 0.903±0.05)
StratifiedKFold CV Recall: 0.891(±0.06)
StratifiedKFold CV Balanced Accuracy: 0.913±0.02)
StratifiedKFold CV Precision: 0.915±0.01)
StratifiedKFold CV Recall: 0.915(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.916±0.01)
StratifiedKFold CV Precisi

knn_0_bacc= round(cv_results2['test_balanced_accuracy'].mean() - cv_results1['test_balanced_accuracy'].mean(), 3)
knn_1_bacc= round(cv_results4['test_balanced_accuracy'].mean() - cv_results3['test_balanced_accuracy'].mean(), 3)
knn_2_bacc= round(cv_results6['test_balanced_accuracy'].mean() - cv_results5['test_balanced_accuracy'].mean(), 3)
knn_3_bacc= round(cv_results8['test_balanced_accuracy'].mean() - cv_results7['test_balanced_accuracy'].mean(), 3)

knn_0_precision= round(cv_results2['test_precision_weighted'].mean() - cv_results1['test_precision_weighted'].mean(), 3)
knn_1_precision= round(cv_results4['test_precision_weighted'].mean() - cv_results3['test_precision_weighted'].mean(), 3)
knn_2_precision= round(cv_results6['test_precision_weighted'].mean() - cv_results5['test_precision_weighted'].mean(), 3)
knn_3_precision= round(cv_results8['test_precision_weighted'].mean() - cv_results7['test_precision_weighted'].mean(), 3)

knn_0_recall= round(cv_results2['test_recall_weighted'].mean() - cv_results1['test_recall_weighted'].mean(), 3)
knn_1_recall= round(cv_results4['test_recall_weighted'].mean() - cv_results3['test_recall_weighted'].mean(), 3)
knn_2_recall= round(cv_results6['test_recall_weighted'].mean() - cv_results5['test_recall_weighted'].mean(), 3)
knn_3_recall= round(cv_results8['test_recall_weighted'].mean() - cv_results7['test_recall_weighted'].mean(), 3)

In [14]:
#Ada Boost
from sklearn.ensemble import AdaBoostClassifier
cv_results1 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds1, y_ds1)

cv_results2 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds2, y_ds2)

cv_results3 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds3, y_ds3)

cv_results4 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds4, y_ds4)

cv_results5 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds5, y_ds5)

cv_results6 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds6, y_ds6)

cv_results7 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds7, y_ds7)

cv_results8 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds8, y_ds8)

compute_model_differences("ada", globals())

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.772±0.05)
StratifiedKFold CV Precision: 0.834±0.03)
StratifiedKFold CV Recall: 0.827(±0.02)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.743±0.05)
StratifiedKFold CV Precision: 0.830±0.03)
StratifiedKFold CV Recall: 0.824(±0.02)
StratifiedKFold CV Balanced Accuracy: 0.969±0.04)
StratifiedKFold CV Precision: 0.975±0.03)
StratifiedKFold CV Recall: 0.971(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.874±0.05)
StratifiedKFold CV Precision: 0.922±0.03)
StratifiedKFold CV Recall: 0.913(±0.03)
StratifiedKFold CV Balanced Accuracy: 0.807±0.07)
StratifiedKFold CV Precision: 0.870±0.05)
StratifiedKFold CV Recall: 0.867(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.927±0.01)
StratifiedKFold CV Precision: 0.928±0.01)
StratifiedKFold CV Recall: 0.927(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.927±0.01)
StratifiedKFold CV Precision: 0.928±0.01)
StratifiedKFold CV Recall: 0.927(±0.01)


In [15]:
#SVM
from sklearn.svm import SVC 
cv_results1 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds1, y_ds1)

cv_results2 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds2, y_ds2)

cv_results3 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds3, y_ds3)

cv_results4 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds4, y_ds4)

cv_results5 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds5, y_ds5)

cv_results6 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds6, y_ds6)

cv_results7 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds7, y_ds7)

cv_results8 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds8, y_ds8)

compute_model_differences("svm", globals())

StratifiedKFold CV Balanced Accuracy: 0.935±0.01)
StratifiedKFold CV Precision: 0.924±0.01)
StratifiedKFold CV Recall: 0.923(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.921±0.00)
StratifiedKFold CV Precision: 0.913±0.00)
StratifiedKFold CV Recall: 0.911(±0.00)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.742±0.11)
StratifiedKFold CV Precision: 0.867±0.11)
StratifiedKFold CV Recall: 0.871(±0.06)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.727±0.10)
StratifiedKFold CV Precision: 0.853±0.11)
StratifiedKFold CV Recall: 0.861(±0.06)
StratifiedKFold CV Balanced Accuracy: 0.927±0.01)
StratifiedKFold CV Precision: 0.929±0.01)
StratifiedKFold CV Recall: 0.929(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.921±0.01)
StratifiedKFold CV Precision: 0.924±0.01)
StratifiedKFold CV Recall: 0.923(±0.01)


In [16]:
#Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB 

cv_results1 = implement(GaussianNB(),
                     x_ds1, y_ds1)

cv_results2 = implement(GaussianNB(),
                     x_ds2, y_ds2)

cv_results3 = implement(GaussianNB(),
                     x_ds3, y_ds3)

cv_results4 = implement(GaussianNB(),
                     x_ds4, y_ds4)

cv_results5 = implement(GaussianNB(),
                     x_ds5, y_ds5)

cv_results6 = implement(GaussianNB(),
                     x_ds6, y_ds6)

cv_results7 = implement(GaussianNB(),
                     x_ds7, y_ds7)

cv_results8 = implement(GaussianNB(),
                     x_ds8, y_ds8)

compute_model_differences("gnb", globals())

StratifiedKFold CV Balanced Accuracy: 0.902±0.01)
StratifiedKFold CV Precision: 0.894±0.01)
StratifiedKFold CV Recall: 0.891(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.891±0.01)
StratifiedKFold CV Precision: 0.885±0.01)
StratifiedKFold CV Recall: 0.881(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.970±0.05)
StratifiedKFold CV Precision: 0.973±0.04)
StratifiedKFold CV Recall: 0.971(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.982±0.04)
StratifiedKFold CV Precision: 0.985±0.03)
StratifiedKFold CV Recall: 0.982(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.778±0.09)
StratifiedKFold CV Precision: 0.840±0.06)
StratifiedKFold CV Recall: 0.732(±0.07)
StratifiedKFold CV Balanced Accuracy: 0.813±0.06)
StratifiedKFold CV Precision: 0.860±0.04)
StratifiedKFold CV Recall: 0.758(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.914±0.01)
StratifiedKFold CV Precision: 0.916±0.01)
StratifiedKFold CV Recall: 0.916(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.917±0.01)
StratifiedKFold CV Precisi

In [17]:
#Random Forests
from sklearn.ensemble import RandomForestClassifier

cv_results1 = implement(RandomForestClassifier(),
                     x_ds1, y_ds1)

cv_results2 = implement(RandomForestClassifier(),
                     x_ds2, y_ds2)

cv_results3 = implement(RandomForestClassifier(),
                     x_ds3, y_ds3)

cv_results4 = implement(RandomForestClassifier(),
                     x_ds4, y_ds4)

cv_results5 = implement(RandomForestClassifier(),
                     x_ds5, y_ds5)

cv_results6 = implement(RandomForestClassifier(),
                     x_ds6, y_ds6)

cv_results7 = implement(RandomForestClassifier(),
                     x_ds7, y_ds7)

cv_results8 = implement(RandomForestClassifier(),
                     x_ds8, y_ds8)

compute_model_differences("rf", globals())

StratifiedKFold CV Balanced Accuracy: 0.931±0.01)
StratifiedKFold CV Precision: 0.920±0.01)
StratifiedKFold CV Recall: 0.920(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.913±0.01)
StratifiedKFold CV Precision: 0.902±0.01)
StratifiedKFold CV Recall: 0.901(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.880±0.09)
StratifiedKFold CV Precision: 0.927±0.05)
StratifiedKFold CV Recall: 0.917(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.843±0.09)
StratifiedKFold CV Precision: 0.897±0.06)
StratifiedKFold CV Recall: 0.886(±0.06)
StratifiedKFold CV Balanced Accuracy: 0.906±0.01)
StratifiedKFold CV Precision: 0.908±0.01)
StratifiedKFold CV Recall: 0.908(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.918±0.01)
StratifiedKFold CV Precisi

In [18]:
#Decesion Trees
from sklearn.tree import DecisionTreeClassifier


cv_results1 = implement(DecisionTreeClassifier(),
                     x_ds1, y_ds1)

cv_results2 = implement(DecisionTreeClassifier(),
                     x_ds2, y_ds2)

cv_results3 = implement(DecisionTreeClassifier(),
                     x_ds3, y_ds3)

cv_results4 = implement(DecisionTreeClassifier(),
                     x_ds4, y_ds4)

cv_results5 = implement(DecisionTreeClassifier(),
                     x_ds5, y_ds5)

cv_results6 = implement(DecisionTreeClassifier(),
                     x_ds6, y_ds6)

cv_results7 = implement(DecisionTreeClassifier(),
                     x_ds7, y_ds7)

cv_results8 = implement(DecisionTreeClassifier(),
                     x_ds8, y_ds8)

compute_model_differences("dt", globals())

StratifiedKFold CV Balanced Accuracy: 0.909±0.01)
StratifiedKFold CV Precision: 0.895±0.01)
StratifiedKFold CV Recall: 0.894(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.904±0.01)
StratifiedKFold CV Precision: 0.892±0.01)
StratifiedKFold CV Recall: 0.892(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.971±0.06)
StratifiedKFold CV Precision: 0.972±0.05)
StratifiedKFold CV Recall: 0.971(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.960±0.05)
StratifiedKFold CV Precision: 0.962±0.05)
StratifiedKFold CV Recall: 0.959(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.873±0.11)
StratifiedKFold CV Precision: 0.919±0.04)
StratifiedKFold CV Recall: 0.907(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.867±0.10)
StratifiedKFold CV Precision: 0.918±0.04)
StratifiedKFold CV Recall: 0.902(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.891±0.01)
StratifiedKFold CV Precision: 0.892±0.01)
StratifiedKFold CV Recall: 0.891(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.915±0.01)
StratifiedKFold CV Precisi

Comparison results for futher use at LaTex table format

In [19]:
print (f"""
Beans 
& accuracy
& {knn_0_bacc}
& {ada_0_bacc}
& {svm_0_bacc}
& {gnb_0_bacc}
& {rf_0_bacc}
& {dt_0_bacc}
\\\\
Beans 
& precision 
& {knn_0_precision}
& {ada_0_precision}
& {svm_0_precision}
& {gnb_0_precision}
& {rf_0_precision}
& {dt_0_precision}
\\\\
Beans 
& recall 
& {knn_0_recall}
& {ada_0_recall}
& {svm_0_recall}
& {gnb_0_recall}
& {rf_0_recall}
& {dt_0_recall}
\\\\Divorce 
& accuracy
& {knn_1_bacc}
& {ada_1_bacc}
& {svm_1_bacc}
& {gnb_1_bacc}
& {rf_1_bacc}
& {dt_1_bacc}
\\\\
Divorce 
& precision 
& {knn_1_precision}
& {ada_1_precision}
& {svm_1_precision}
& {gnb_1_precision}
& {rf_1_precision}
& {dt_1_precision}
\\\\
Divorce
& recall 
& {knn_1_recall}
& {ada_1_recall}
& {svm_1_recall}
& {gnb_1_recall}
& {rf_1_recall}
& {dt_1_recall}
\\\\Parkinson's 
& accuracy
& {knn_2_bacc}
& {ada_2_bacc}
& {svm_2_bacc}
& {gnb_2_bacc}
& {rf_2_bacc}
& {dt_2_bacc}
\\\\
Parkinson's 
& precision 
& {knn_2_precision}
& {ada_2_precision}
& {svm_2_precision}
& {gnb_2_precision}
& {rf_2_precision}
& {dt_2_precision} 
\\\\
Parkinson's 
& recall 
& {knn_2_recall}
& {ada_2_recall}
& {svm_2_recall}
& {gnb_2_recall}
& {rf_2_recall}
& {dt_2_recall}
\\\\
Rice 
& accuracy
& {knn_3_bacc}
& {ada_3_bacc}
& {svm_3_bacc}
& {gnb_3_bacc}
& {rf_3_bacc}
& {dt_3_bacc}
\\\\
Rice 
& precision 
& {knn_3_precision}
& {ada_3_precision}
& {svm_3_precision}
& {gnb_3_precision}
& {rf_3_precision}
& {dt_3_precision}
\\\\
Rice 
& recall 
& {knn_3_recall}
& {ada_3_recall}
& {svm_3_recall}
& {gnb_3_recall}
& {rf_3_recall}
& {dt_3_recall}
\\\\
""")


Beans 
& accuracy
& -0.011 \scriptsize(\%1.2)
& -0.029 \scriptsize(\%3.8)
& -0.014 \scriptsize(\%1.5)
& -0.011 \scriptsize(\%1.2)
& -0.018 \scriptsize(\%1.9)
& -0.005 \scriptsize(\%0.6)
\\
Beans 
& precision 
& -0.010 \scriptsize(\%1.1)
& -0.004 \scriptsize(\%0.5)
& -0.011 \scriptsize(\%1.2)
& -0.009 \scriptsize(\%1.0)
& -0.018 \scriptsize(\%2.0)
& -0.002 \scriptsize(\%0.2)
\\
Beans 
& recall 
& -0.010 \scriptsize(\%1.1)
& -0.003 \scriptsize(\%0.4)
& -0.012 \scriptsize(\%1.3)
& -0.010 \scriptsize(\%1.1)
& -0.019 \scriptsize(\%2.1)
& -0.002 \scriptsize(\%0.2)
\\Divorce 
& accuracy
& 0
& +0.006 \scriptsize(\%0.6)
& 0
& +0.012 \scriptsize(\%1.2)
& 0
& -0.011 \scriptsize(\%1.1)
\\
Divorce 
& precision 
& 0
& +0.005 \scriptsize(\%0.5)
& 0
& +0.012 \scriptsize(\%1.2)
& 0
& -0.010 \scriptsize(\%1.0)
\\
Divorce
& recall 
& 0
& +0.006 \scriptsize(\%0.6)
& 0
& +0.012 \scriptsize(\%1.2)
& 0
& -0.012 \scriptsize(\%1.2)
\\Parkinson's 
& accuracy
& -0.019 \scriptsize(\%2.2)
& -0.067 \scriptsize(\%7