imports

In [79]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, balanced_accuracy_score
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.preprocessing import LabelEncoder

In [80]:

def preprocess(file_path,target_column_index,has_header,delete_list,extraction_list):
    """this function is used for taking the dataset from .csv file making a test train split
    and x, y split (has header is either True or False)"""

    #reading the .csv
    if has_header:
        df = pd.read_csv(file_path, header=0, delimiter=",")  # First row as header
        print(f"Dataset shape: {df.shape}")
        print(f"Column names: {list(df.columns)}")
    else:
        df = pd.read_csv(file_path, header=None, delimiter=",")  # No header row
        print(f"Dataset shape: {df.shape}")

    # Find all string/object columns automatically
    string_columns = df.select_dtypes(include=['object']).columns
    print(string_columns)
    # Encode all string columns
    for column in string_columns:
        encoder = LabelEncoder()
        df[column] = encoder.fit_transform(df[column])

    df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)

    #making target variable split
    x_first = df_shuffled.drop(df.columns[target_column_index], axis=1)  # All except target

    x_ds = x_first.drop(columns = delete_list + extraction_list)
    y_ds = df_shuffled.iloc[:, target_column_index]  # Target column only
    

    print(f'shape of x: {x_ds.shape}')

    return x_ds, y_ds

In [81]:
def implement(model, x_ds, y_ds):
    """this function contains implementation (modeling and fitting) on the dataset with 1 
    initiation and shows the results of the implementations.
    Note: This function doesn't have compile so it is not appoprate to use this function for perceptron learning algs.
    because it doesn't have training vs test accuracy comparison and doesn't have epochs and .argmax(axis=1) """
    
    # classical model evaluation
    """
    #initiante ml
    model.fit(x_ds, y_ds)
    y_pred = model.predict(x_test)
    print(f'Confusion matrix: \n{confusion_matrix(y_test, y_pred)}')
    print(f'Classification report: {classification_report(y_test, y_pred)}')
    print(f"Accuracy score is: {accuracy_score(y_test,y_pred):.3f}")
    print(f"Balanced accuracy score is: {balanced_accuracy_score(y_test,y_pred):.3f}")
    print(f"Precision score is: {precision_score(y_test,y_pred, average='weighted'):.3f}")
    print(f"Recall score is: {recall_score(y_test,y_pred, average='weighted'):.3f}")
    """
    # StratifiedKFold evaluations
    skf = StratifiedKFold(n_splits=10)
    scoring = ['balanced_accuracy', 'accuracy', 'precision_weighted', 'recall_weighted']
    cv_results = cross_validate(model, x_ds, y_ds, cv=skf, scoring=scoring)

    #if type(model) == KNeighborsClassifier:
    #    knn_cv_bacc = round(cv_results['test_balanced_accuracy'].mean(),3)
    #    knn_cv_precision = round(cv_results['test_precision_weighted'].mean(),3)
    #    knn_cv_recall = round(cv_results['test_recall_weighted'].mean(),3)
        

    print(f"StratifiedKFold CV Balanced Accuracy: {cv_results['test_balanced_accuracy'].mean():.3f}±{cv_results['test_balanced_accuracy'].std():.2f})")
    print(f"StratifiedKFold CV Accuracy: {cv_results['test_accuracy'].mean():.3f}±{cv_results['test_accuracy'].std():.2f})")
    print(f"StratifiedKFold CV Precision: {cv_results['test_precision_weighted'].mean():.3f}±{cv_results['test_precision_weighted'].std():.2f})")
    print(f"StratifiedKFold CV Recall: {cv_results['test_recall_weighted'].mean():.3f}(±{cv_results['test_recall_weighted'].std():.2f})")

    return cv_results 
        

Select the dataset!

Runs the preprocess for diffrent datasets.

In [82]:
x_ds1, y_ds1 = preprocess("beans_kmeans.csv", 16, True, [], [])

Dataset shape: (13611, 17)
Column names: ['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'AspectRation', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 'Extent', 'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2', 'ShapeFactor3', 'ShapeFactor4', 'Class']
Index(['Class'], dtype='object')
shape of x: (13611, 16)


In [83]:
x_ds2, y_ds2 = preprocess("beans_kmeans.csv", 16, True, [],["ShapeFactor4","Solidity","Extent"] )

Dataset shape: (13611, 17)
Column names: ['Area', 'Perimeter', 'MajorAxisLength', 'MinorAxisLength', 'AspectRation', 'Eccentricity', 'ConvexArea', 'EquivDiameter', 'Extent', 'Solidity', 'roundness', 'Compactness', 'ShapeFactor1', 'ShapeFactor2', 'ShapeFactor3', 'ShapeFactor4', 'Class']
Index(['Class'], dtype='object')
shape of x: (13611, 13)


x_ds, y_ds = preprocess("diabetes_kmeans.csv", 8, True, [], [])

x_ds, y_ds = preprocess("diabetes_kmeans.csv", 8, True, [], [])

In [84]:
x_ds3, y_ds3 = preprocess("divorce.csv", 54, True, [], [])

Dataset shape: (170, 55)
Column names: ['Atr1', 'Atr2', 'Atr3', 'Atr4', 'Atr5', 'Atr6', 'Atr7', 'Atr8', 'Atr9', 'Atr10', 'Atr11', 'Atr12', 'Atr13', 'Atr14', 'Atr15', 'Atr16', 'Atr17', 'Atr18', 'Atr19', 'Atr20', 'Atr21', 'Atr22', 'Atr23', 'Atr24', 'Atr25', 'Atr26', 'Atr27', 'Atr28', 'Atr29', 'Atr30', 'Atr31', 'Atr32', 'Atr33', 'Atr34', 'Atr35', 'Atr36', 'Atr37', 'Atr38', 'Atr39', 'Atr40', 'Atr41', 'Atr42', 'Atr43', 'Atr44', 'Atr45', 'Atr46', 'Atr47', 'Atr48', 'Atr49', 'Atr50', 'Atr51', 'Atr52', 'Atr53', 'Atr54', 'Class']
Index([], dtype='object')
shape of x: (170, 54)


In [85]:
x_ds4, y_ds4 = preprocess("divorce.csv", 54, True, [], ['Atr53','Atr7','Atr47','Atr48','Atr52','Atr43','Atr45','Atr6','Atr46','Atr42','Atr51','Atr49'])

Dataset shape: (170, 55)
Column names: ['Atr1', 'Atr2', 'Atr3', 'Atr4', 'Atr5', 'Atr6', 'Atr7', 'Atr8', 'Atr9', 'Atr10', 'Atr11', 'Atr12', 'Atr13', 'Atr14', 'Atr15', 'Atr16', 'Atr17', 'Atr18', 'Atr19', 'Atr20', 'Atr21', 'Atr22', 'Atr23', 'Atr24', 'Atr25', 'Atr26', 'Atr27', 'Atr28', 'Atr29', 'Atr30', 'Atr31', 'Atr32', 'Atr33', 'Atr34', 'Atr35', 'Atr36', 'Atr37', 'Atr38', 'Atr39', 'Atr40', 'Atr41', 'Atr42', 'Atr43', 'Atr44', 'Atr45', 'Atr46', 'Atr47', 'Atr48', 'Atr49', 'Atr50', 'Atr51', 'Atr52', 'Atr53', 'Atr54', 'Class']
Index([], dtype='object')
shape of x: (170, 42)


In [86]:
x_ds5, y_ds5 = preprocess("parkinsons_kmeans.csv", 17, True, ['name'], [])

Dataset shape: (195, 24)
Column names: ['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5', 'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA', 'spread1', 'spread2', 'D2', 'PPE']
Index(['name'], dtype='object')
shape of x: (195, 22)


In [None]:
x_ds6, y_ds6 = preprocess("parkinsons_kmeans.csv", 17, True, ['name'], ['DFA','MDVP:Fo(Hz)','MDVP:Fhi(Hz)','MDVP:Flo(Hz)','Jitter:DDP','HNR','MDVP:Jitter(%)','D2','MDVP:PPQ'])

Dataset shape: (195, 24)
Column names: ['name', 'MDVP:Fo(Hz)', 'MDVP:Fhi(Hz)', 'MDVP:Flo(Hz)', 'MDVP:Jitter(%)', 'MDVP:Jitter(Abs)', 'MDVP:RAP', 'MDVP:PPQ', 'Jitter:DDP', 'MDVP:Shimmer', 'MDVP:Shimmer(dB)', 'Shimmer:APQ3', 'Shimmer:APQ5', 'MDVP:APQ', 'Shimmer:DDA', 'NHR', 'HNR', 'status', 'RPDE', 'DFA', 'spread1', 'spread2', 'D2', 'PPE']
Index(['name'], dtype='object')
shape of x: (195, 13)


In [88]:
x_ds7, y_ds7 = preprocess("rice_binned_kmeans.csv", 7, True, [], [])

Dataset shape: (3810, 8)
Column names: ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length', 'Eccentricity', 'Convex_Area', 'Extent', 'Class']
Index(['Class'], dtype='object')
shape of x: (3810, 7)


In [89]:
x_ds8, y_ds8 = preprocess("rice_binned_kmeans.csv", 7, True, [], ['Minor_Axis_Length', 'Extent'])

Dataset shape: (3810, 8)
Column names: ['Area', 'Perimeter', 'Major_Axis_Length', 'Minor_Axis_Length', 'Eccentricity', 'Convex_Area', 'Extent', 'Class']
Index(['Class'], dtype='object')
shape of x: (3810, 5)


In [90]:
x_ds9, y_ds9 = preprocess("wdbc_binned_kmeans.csv", 1, True, ['ID'], [])

Dataset shape: (569, 32)
Column names: ['ID', 'Diagnosis', 'Radius_Mean', 'Texture_Mean', 'Perimeter_Mean', 'Area_Mean', 'Smoothness_Mean', 'Compactness_Mean', 'Concavity_Mean', 'Concave_Points_Mean', 'Symmetry_Mean', 'Fractal_Dimension_Mean', 'Radius_SE', 'Texture_SE', 'Perimeter_SE', 'Area_SE', 'Smoothness_SE', 'Compactness_SE', 'Concavity_SE', 'Concave_Points_SE', 'Symmetry_SE', 'Fractal_Dimension_SE', 'Radius_Worst', 'Texture_Worst', 'Perimeter_Worst', 'Area_Worst', 'Smoothness_Worst', 'Compactness_Worst', 'Concavity_Worst', 'Concave_Points_Worst', 'Symmetry_Worst', 'Fractal_Dimension_Worst']
Index(['Diagnosis'], dtype='object')
shape of x: (569, 30)


In [91]:
x_ds10, y_ds10 = preprocess("wdbc_binned_kmeans.csv", 1, True, ['ID'], ["Texture_Mean","Concave_Points_SE","Concavity_SE","Texture_Worst","Smoothness_Worst","Symmetry_Worst","Compactness_SE","Smoothness_Mean","Fractal_Dimension_Worst","Symmetry_Mean","Fractal_Dimension_SE","Symmetry_SE","Smoothness_SE","Texture_SE","Fractal_Dimension_Mean"])

Dataset shape: (569, 32)
Column names: ['ID', 'Diagnosis', 'Radius_Mean', 'Texture_Mean', 'Perimeter_Mean', 'Area_Mean', 'Smoothness_Mean', 'Compactness_Mean', 'Concavity_Mean', 'Concave_Points_Mean', 'Symmetry_Mean', 'Fractal_Dimension_Mean', 'Radius_SE', 'Texture_SE', 'Perimeter_SE', 'Area_SE', 'Smoothness_SE', 'Compactness_SE', 'Concavity_SE', 'Concave_Points_SE', 'Symmetry_SE', 'Fractal_Dimension_SE', 'Radius_Worst', 'Texture_Worst', 'Perimeter_Worst', 'Area_Worst', 'Smoothness_Worst', 'Compactness_Worst', 'Concavity_Worst', 'Concave_Points_Worst', 'Symmetry_Worst', 'Fractal_Dimension_Worst']
Index(['Diagnosis'], dtype='object')
shape of x: (569, 15)


Compute the model

# compute only the model
def compute_model_differences(model_name, globals_dict):
    """
    Creates variables like knn_0_bacc, knn_1_precision, etc.
    Returns the mean ± std from cv_results1, 3, 5, 7
    Format: "0.713 \\scriptsize(±0.02)"
    """
    metrics = {
        "bacc": "balanced_accuracy",
        "precision": "precision_weighted",
        "recall": "recall_weighted"
    }

    # Only indices 1, 3, 5, 7 - return mean ± std
    result_indices = [1, 3, 5, 7]
    
    for i, result_idx in enumerate(result_indices):
        cv = globals_dict[f"cv_results{result_idx}"]

        for short, metric in metrics.items():
            var_name = f"{model_name}_{i}_{short}"
            mean_val = cv[f"test_{metric}"].mean()
            std_val = cv[f"test_{metric}"].std()
            formatted = f"{mean_val:.3f} \\scriptsize(±{std_val:.2f})"
            globals_dict[var_name] = formatted

Run the implement function for diffrent models

In [92]:
def compute_model_differences(model_name, globals_dict):
    """
    Creates variables like knn_0_bacc, knn_0_precision, knn_0_recall, etc.
    Each value formatted as: '0.915 \\scriptsize(±0.01)'
    Uses cv_results1, cv_results3, cv_results5, cv_results7
    
    NOTE: You must run implement() for each model and call this function
    immediately after to capture that model's cv_results before the next model
    overwrites them.
    """
    metrics = {
        "bacc": "balanced_accuracy",
        "precision": "precision_weighted",
        "recall": "recall_weighted"
    }
    
    # Use cv_results1, cv_results3, cv_results5, cv_results7
    cv_indices = [1, 3, 5, 7, 9]
    
    for idx, cv_num in enumerate(cv_indices):
        cv_results = globals_dict[f"cv_results{cv_num}"]
        
        for short, metric in metrics.items():
            var_name = f"{model_name}_{idx}_{short}"
            mean_val = cv_results[f"test_{metric}"].mean()
            std_val = cv_results[f"test_{metric}"].std()
            
            formatted = f"{mean_val:.3f} \\scriptsize(±{std_val:.2f})"
            
            globals_dict[var_name] = formatted

def compute_model_differences(model_name, globals_dict):
    """
    Creates variables like knn_0_bacc, knn_1_precision, etc.
    Each value formatted for LaTeX:
      e.g. '+0.011 \\scriptsize(%1)' or '-0.002 \\scriptsize(%0.2)'
    If diff == 0 → '0'
    """
    metrics = {
        "bacc": "balanced_accuracy",
        "precision": "precision_weighted",
        "recall": "recall_weighted"
    }

    for i in range(0, 10, 2):  # pairs: (1,2), (3,4), (5,6), (7,8), (9,10)
        idx = i // 2
        cv_a = globals_dict[f"cv_results{i+1}"]
        cv_b = globals_dict[f"cv_results{i+2}"]

        for short, metric in metrics.items():
            var_name = f"{model_name}_{idx}_{short}"

            mean_a = cv_a[f"test_{metric}"].mean()
            mean_b = cv_b[f"test_{metric}"].mean()
            diff = round(mean_b - mean_a, 3)

            if diff == 0:
                formatted = "0"
            else:
                pct = round(abs(diff / mean_a * 100), 1)
                sign = "+" if diff > 0 else ""
                formatted = f"{sign}{diff:.3f} \\scriptsize(\\%{pct})"

            globals_dict[var_name] = formatted


In [93]:
#KNN
from sklearn.neighbors import KNeighborsClassifier
cv_results1 = implement(KNeighborsClassifier(),
                     x_ds1, y_ds1)

cv_results2 = implement(KNeighborsClassifier(),
                     x_ds2, y_ds2)

cv_results3 = implement(KNeighborsClassifier(),
                     x_ds3, y_ds3)

cv_results4 = implement(KNeighborsClassifier(),
                     x_ds4, y_ds4)

cv_results5 = implement(KNeighborsClassifier(),
                     x_ds5, y_ds5)

cv_results6 = implement(KNeighborsClassifier(),
                     x_ds6, y_ds6)

cv_results7 = implement(KNeighborsClassifier(),
                     x_ds7, y_ds7)

cv_results8 = implement(KNeighborsClassifier(),
                     x_ds8, y_ds8)

cv_results9 = implement(KNeighborsClassifier(),
                     x_ds9, y_ds9)

cv_results10 = implement(KNeighborsClassifier(),
                     x_ds10, y_ds10)


compute_model_differences("knn", globals())


StratifiedKFold CV Balanced Accuracy: 0.928±0.01)
StratifiedKFold CV Accuracy: 0.915±0.01)
StratifiedKFold CV Precision: 0.916±0.01)
StratifiedKFold CV Recall: 0.915(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.916±0.01)
StratifiedKFold CV Accuracy: 0.906±0.01)
StratifiedKFold CV Precision: 0.906±0.01)
StratifiedKFold CV Recall: 0.906(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.876±0.09)
StratifiedKFold CV Accuracy: 0.923±0.04)
StratifiedKFold CV Precision: 0.931±0.04)
StratifiedKFold CV Recall: 0.923(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.807±0.08)
StratifiedKFold CV Accuracy: 0.872±0.05)
StratifiedKFold CV Precision: 0.872±0.05)
St

In [94]:
#Ada Boost
from sklearn.ensemble import AdaBoostClassifier
cv_results1 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds1, y_ds1)

cv_results2 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds2, y_ds2)

cv_results3 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds3, y_ds3)

cv_results4 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds4, y_ds4)

cv_results5 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds5, y_ds5)

cv_results6 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds6, y_ds6)

cv_results7 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds7, y_ds7)

cv_results8 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds8, y_ds8)

cv_results9 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds9, y_ds9)

cv_results10 = implement(AdaBoostClassifier(n_estimators=100, random_state=42),
                     x_ds10, y_ds10)

compute_model_differences("ada", globals())

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.772±0.05)
StratifiedKFold CV Accuracy: 0.827±0.02)
StratifiedKFold CV Precision: 0.834±0.03)
StratifiedKFold CV Recall: 0.827(±0.02)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.743±0.05)
StratifiedKFold CV Accuracy: 0.824±0.02)
StratifiedKFold CV Precision: 0.830±0.03)
StratifiedKFold CV Recall: 0.824(±0.02)
StratifiedKFold CV Balanced Accuracy: 0.969±0.04)
StratifiedKFold CV Accuracy: 0.971±0.04)
StratifiedKFold CV Precision: 0.975±0.03)
StratifiedKFold CV Recall: 0.971(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.874±0.05)
StratifiedKFold CV Accuracy: 0.913±0.03)
StratifiedKFold CV Precision: 0.922±0.03)
StratifiedKFold CV Recall: 0.913(±0.03)
StratifiedKFold CV Balanced Accuracy: 0.788±0.06)
StratifiedKFold CV Accuracy: 0.846±0.04)
StratifiedKFold CV Precision: 0.855±0.04)
StratifiedKFold CV Recall: 0.846(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.927±0.01)
StratifiedKFold CV Accuracy: 0.927±0.01)
StratifiedKFold CV Precision: 0.928±0.01)
St

In [95]:
#SVM
from sklearn.svm import SVC 
cv_results1 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds1, y_ds1)

cv_results2 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds2, y_ds2)

cv_results3 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds3, y_ds3)

cv_results4 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds4, y_ds4)

cv_results5 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds5, y_ds5)

cv_results6 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds6, y_ds6)

cv_results7 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds7, y_ds7)

cv_results8 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds8, y_ds8)

cv_results9 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds9, y_ds9)

cv_results10 = implement(SVC(max_iter = -1, random_state=42),
                     x_ds10, y_ds10)

compute_model_differences("svm", globals())

StratifiedKFold CV Balanced Accuracy: 0.935±0.01)
StratifiedKFold CV Accuracy: 0.923±0.01)
StratifiedKFold CV Precision: 0.924±0.01)
StratifiedKFold CV Recall: 0.923(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.921±0.00)
StratifiedKFold CV Accuracy: 0.911±0.00)
StratifiedKFold CV Precision: 0.913±0.00)
StratifiedKFold CV Recall: 0.911(±0.00)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.742±0.11)
StratifiedKFold CV Accuracy: 0.871±0.06)
StratifiedKFold CV Precision: 0.867±0.11)
StratifiedKFold CV Recall: 0.871(±0.06)


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


StratifiedKFold CV Balanced Accuracy: 0.762±0.07)
StratifiedKFold CV Accuracy: 0.851±0.06)
StratifiedKFold CV Precision: 0.861±0.06)
StratifiedKFold CV Recall: 0.851(±0.06)
StratifiedKFold CV Balanced Accuracy: 0.927±0.01)
StratifiedKFold CV Accuracy: 0.929±0.01)
StratifiedKFold CV Precision: 0.929±0.01)
StratifiedKFold CV Recall: 0.929(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.921±0.01)
StratifiedKFold CV Accuracy: 0.923±0.01)
StratifiedKFold CV Precision: 0.924±0.01)
StratifiedKFold CV Recall: 0.923(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.969±0.02)
StratifiedKFold CV Accuracy: 0.974±0.02)
StratifiedKFold CV Precision: 0.974±0.02)
StratifiedKFold CV Recall: 0.974(±0.02)
StratifiedKFold CV Balanced Accuracy: 0.943±0.03)
StratifiedKFold CV Accuracy: 0.951±0.03)
StratifiedKFold CV Precision: 0.951±0.03)
StratifiedKFold CV Recall: 0.951(±0.03)


In [96]:
#Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB 

cv_results1 = implement(GaussianNB(),
                     x_ds1, y_ds1)

cv_results2 = implement(GaussianNB(),
                     x_ds2, y_ds2)

cv_results3 = implement(GaussianNB(),
                     x_ds3, y_ds3)

cv_results4 = implement(GaussianNB(),
                     x_ds4, y_ds4)

cv_results5 = implement(GaussianNB(),
                     x_ds5, y_ds5)

cv_results6 = implement(GaussianNB(),
                     x_ds6, y_ds6)

cv_results7 = implement(GaussianNB(),
                     x_ds7, y_ds7)

cv_results8 = implement(GaussianNB(),
                     x_ds8, y_ds8)

cv_results9 = implement(GaussianNB(),
                     x_ds9, y_ds9)

cv_results10 = implement(GaussianNB(),
                     x_ds10, y_ds10)

compute_model_differences("gnb", globals())

StratifiedKFold CV Balanced Accuracy: 0.902±0.01)
StratifiedKFold CV Accuracy: 0.891±0.01)
StratifiedKFold CV Precision: 0.894±0.01)
StratifiedKFold CV Recall: 0.891(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.891±0.01)
StratifiedKFold CV Accuracy: 0.881±0.01)
StratifiedKFold CV Precision: 0.885±0.01)
StratifiedKFold CV Recall: 0.881(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.970±0.05)
StratifiedKFold CV Accuracy: 0.971±0.05)
StratifiedKFold CV Precision: 0.973±0.04)
StratifiedKFold CV Recall: 0.971(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.982±0.04)
StratifiedKFold CV Accuracy: 0.982±0.04)
StratifiedKFold CV Precision: 0.985±0.03)
StratifiedKFold CV Recall: 0.982(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.778±0.09)
StratifiedKFold CV Accuracy: 0.732±0.07)
StratifiedKFold CV Precision: 0.840±0.06)
StratifiedKFold CV Recall: 0.732(±0.07)
StratifiedKFold CV Balanced Accuracy: 0.778±0.09)
StratifiedKFold CV Accuracy: 0.732±0.07)
StratifiedKFold CV Precision: 0.840±0.06)
St

In [97]:
#Random Forests
from sklearn.ensemble import RandomForestClassifier

cv_results1 = implement(RandomForestClassifier(random_state=42),
                     x_ds1, y_ds1)

cv_results2 = implement(RandomForestClassifier(random_state=42),
                     x_ds2, y_ds2)

cv_results3 = implement(RandomForestClassifier(random_state=42),
                     x_ds3, y_ds3)

cv_results4 = implement(RandomForestClassifier(random_state=42),
                     x_ds4, y_ds4)

cv_results5 = implement(RandomForestClassifier(random_state=42),
                     x_ds5, y_ds5)

cv_results6 = implement(RandomForestClassifier(random_state=42),
                     x_ds6, y_ds6)

cv_results7 = implement(RandomForestClassifier(random_state=42),
                     x_ds7, y_ds7)

cv_results8 = implement(RandomForestClassifier(random_state=42),
                     x_ds8, y_ds8)

cv_results9 = implement(RandomForestClassifier(random_state=42),
                     x_ds9, y_ds9)

cv_results10 = implement(RandomForestClassifier(random_state=42),
                     x_ds10, y_ds10)

compute_model_differences("rf", globals())

StratifiedKFold CV Balanced Accuracy: 0.932±0.01)
StratifiedKFold CV Accuracy: 0.921±0.01)
StratifiedKFold CV Precision: 0.922±0.01)
StratifiedKFold CV Recall: 0.921(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.915±0.01)
StratifiedKFold CV Accuracy: 0.903±0.00)
StratifiedKFold CV Precision: 0.903±0.00)
StratifiedKFold CV Recall: 0.903(±0.00)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.976±0.04)
StratifiedKFold CV Accuracy: 0.976±0.04)
StratifiedKFold CV Precision: 0.980±0.03)
StratifiedKFold CV Recall: 0.976(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.886±0.08)
StratifiedKFold CV Accuracy: 0.928±0.04)
StratifiedKFold CV Precision: 0.935±0.04)
StratifiedKFold CV Recall: 0.928(±0.04)
StratifiedKFold CV Balanced Accuracy: 0.817±0.12)
StratifiedKFold CV Accuracy: 0.887±0.07)
StratifiedKFold CV Precision: 0.893±0.08)
St

In [98]:
#Decesion Trees
from sklearn.tree import DecisionTreeClassifier


cv_results1 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds1, y_ds1)

cv_results2 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds2, y_ds2)

cv_results3 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds3, y_ds3)

cv_results4 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds4, y_ds4)

cv_results5 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds5, y_ds5)

cv_results6 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds6, y_ds6)

cv_results7 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds7, y_ds7)

cv_results8 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds8, y_ds8)

cv_results9 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds9, y_ds9)

cv_results10 = implement(DecisionTreeClassifier(random_state=42),
                     x_ds10, y_ds10)

compute_model_differences("dt", globals())

StratifiedKFold CV Balanced Accuracy: 0.908±0.01)
StratifiedKFold CV Accuracy: 0.894±0.01)
StratifiedKFold CV Precision: 0.894±0.01)
StratifiedKFold CV Recall: 0.894(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.904±0.01)
StratifiedKFold CV Accuracy: 0.892±0.01)
StratifiedKFold CV Precision: 0.892±0.01)
StratifiedKFold CV Recall: 0.892(±0.01)
StratifiedKFold CV Balanced Accuracy: 0.971±0.06)
StratifiedKFold CV Accuracy: 0.971±0.05)
StratifiedKFold CV Precision: 0.972±0.05)
StratifiedKFold CV Recall: 0.971(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.965±0.05)
StratifiedKFold CV Accuracy: 0.965±0.05)
StratifiedKFold CV Precision: 0.967±0.05)
StratifiedKFold CV Recall: 0.965(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.900±0.08)
StratifiedKFold CV Accuracy: 0.918±0.05)
StratifiedKFold CV Precision: 0.928±0.04)
StratifiedKFold CV Recall: 0.918(±0.05)
StratifiedKFold CV Balanced Accuracy: 0.812±0.13)
StratifiedKFold CV Accuracy: 0.846±0.07)
StratifiedKFold CV Precision: 0.853±0.09)
St

Comparison results for futher use at LaTex table format

In [99]:
print (f"""
Beans 
& accuracy
& {knn_0_bacc}
& {ada_0_bacc}
& {svm_0_bacc}
& {gnb_0_bacc}
& {rf_0_bacc}
& {dt_0_bacc}
\\\\
Beans 
& precision 
& {knn_0_precision}
& {ada_0_precision}
& {svm_0_precision}
& {gnb_0_precision}
& {rf_0_precision}
& {dt_0_precision}
\\\\
Beans 
& recall 
& {knn_0_recall}
& {ada_0_recall}
& {svm_0_recall}
& {gnb_0_recall}
& {rf_0_recall}
& {dt_0_recall}
\\\\Divorce 
& accuracy
& {knn_1_bacc}
& {ada_1_bacc}
& {svm_1_bacc}
& {gnb_1_bacc}
& {rf_1_bacc}
& {dt_1_bacc}
\\\\
Divorce 
& precision 
& {knn_1_precision}
& {ada_1_precision}
& {svm_1_precision}
& {gnb_1_precision}
& {rf_1_precision}
& {dt_1_precision}
\\\\
Divorce
& recall 
& {knn_1_recall}
& {ada_1_recall}
& {svm_1_recall}
& {gnb_1_recall}
& {rf_1_recall}
& {dt_1_recall}
\\\\Parkinson's 
& accuracy
& {knn_2_bacc}
& {ada_2_bacc}
& {svm_2_bacc}
& {gnb_2_bacc}
& {rf_2_bacc}
& {dt_2_bacc}
\\\\
Parkinson's 
& precision 
& {knn_2_precision}
& {ada_2_precision}
& {svm_2_precision}
& {gnb_2_precision}
& {rf_2_precision}
& {dt_2_precision} 
\\\\
Parkinson's 
& recall 
& {knn_2_recall}
& {ada_2_recall}
& {svm_2_recall}
& {gnb_2_recall}
& {rf_2_recall}
& {dt_2_recall}
\\\\
Rice 
& accuracy
& {knn_3_bacc}
& {ada_3_bacc}
& {svm_3_bacc}
& {gnb_3_bacc}
& {rf_3_bacc}
& {dt_3_bacc}
\\\\
Rice 
& precision 
& {knn_3_precision}
& {ada_3_precision}
& {svm_3_precision}
& {gnb_3_precision}
& {rf_3_precision}
& {dt_3_precision}
\\\\
Rice 
& recall 
& {knn_3_recall}
& {ada_3_recall}
& {svm_3_recall}
& {gnb_3_recall}
& {rf_3_recall}
& {dt_3_recall}
\\\\
WDBC 
& accuracy
& {knn_4_bacc}
& {ada_4_bacc}
& {svm_4_bacc}
& {gnb_4_bacc}
& {rf_4_bacc}
& {dt_4_bacc}
\\\\
WDBC 
& precision 
& {knn_4_precision}
& {ada_4_precision}
& {svm_4_precision}
& {gnb_4_precision}
& {rf_4_precision}
& {dt_4_precision}
\\\\
WDBC 
& recall 
& {knn_4_recall}
& {ada_4_recall}
& {svm_4_recall}
& {gnb_4_recall}
& {rf_4_recall}
& {dt_4_recall}
\\\\
""")


Beans 
& accuracy
& 0.928 \scriptsize(±0.01)
& 0.772 \scriptsize(±0.05)
& 0.935 \scriptsize(±0.01)
& 0.902 \scriptsize(±0.01)
& 0.932 \scriptsize(±0.01)
& 0.908 \scriptsize(±0.01)
\\
Beans 
& precision 
& 0.916 \scriptsize(±0.01)
& 0.834 \scriptsize(±0.03)
& 0.924 \scriptsize(±0.01)
& 0.894 \scriptsize(±0.01)
& 0.922 \scriptsize(±0.01)
& 0.894 \scriptsize(±0.01)
\\
Beans 
& recall 
& 0.915 \scriptsize(±0.01)
& 0.827 \scriptsize(±0.02)
& 0.923 \scriptsize(±0.01)
& 0.891 \scriptsize(±0.01)
& 0.921 \scriptsize(±0.01)
& 0.894 \scriptsize(±0.01)
\\Divorce 
& accuracy
& 0.976 \scriptsize(±0.04)
& 0.969 \scriptsize(±0.04)
& 0.976 \scriptsize(±0.04)
& 0.970 \scriptsize(±0.05)
& 0.976 \scriptsize(±0.04)
& 0.971 \scriptsize(±0.06)
\\
Divorce 
& precision 
& 0.980 \scriptsize(±0.03)
& 0.975 \scriptsize(±0.03)
& 0.980 \scriptsize(±0.03)
& 0.973 \scriptsize(±0.04)
& 0.980 \scriptsize(±0.03)
& 0.972 \scriptsize(±0.05)
\\
Divorce
& recall 
& 0.976 \scriptsize(±0.04)
& 0.971 \scriptsize(±0.04)
& 0.97