# COGS 118A Final Project Fall 2024

## Dataset 2: [Estimation of Obesity Levels Based On Eating Habits and Physical Condition](https://archive.ics.uci.edu/dataset/544/estimation+of+obesity+levels+based+on+eating+habits+and+physical+condition)


In [1]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
wine_quality = fetch_ucirepo(id=186)

dataset = wine_quality.data.original

dataset.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,color
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red


In [3]:
dataset.shape

(6497, 13)

In [4]:
dataset.dtypes

fixed_acidity           float64
volatile_acidity        float64
citric_acid             float64
residual_sugar          float64
chlorides               float64
free_sulfur_dioxide     float64
total_sulfur_dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
color                    object
dtype: object

In [5]:
dataset.isnull().sum()

fixed_acidity           0
volatile_acidity        0
citric_acid             0
residual_sugar          0
chlorides               0
free_sulfur_dioxide     0
total_sulfur_dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
color                   0
dtype: int64

In [6]:
dataset.describe()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
count,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0
mean,7.215307,0.339666,0.318633,5.443235,0.056034,30.525319,115.744574,0.994697,3.218501,0.531268,10.491801,5.818378
std,1.296434,0.164636,0.145318,4.757804,0.035034,17.7494,56.521855,0.002999,0.160787,0.148806,1.192712,0.873255
min,3.8,0.08,0.0,0.6,0.009,1.0,6.0,0.98711,2.72,0.22,8.0,3.0
25%,6.4,0.23,0.25,1.8,0.038,17.0,77.0,0.99234,3.11,0.43,9.5,5.0
50%,7.0,0.29,0.31,3.0,0.047,29.0,118.0,0.99489,3.21,0.51,10.3,6.0
75%,7.7,0.4,0.39,8.1,0.065,41.0,156.0,0.99699,3.32,0.6,11.3,6.0
max,15.9,1.58,1.66,65.8,0.611,289.0,440.0,1.03898,4.01,2.0,14.9,9.0


In [7]:
dataset['quality'].value_counts()

quality
6    2836
5    2138
7    1079
4     216
8     193
3      30
9       5
Name: count, dtype: int64

In [8]:
mean_quality = dataset['quality'].mean()
dataset['binary_quality'] = (dataset['quality'] > mean_quality).astype(int)

dataset['binary_quality'].value_counts()

binary_quality
1    4113
0    2384
Name: count, dtype: int64

# Pre Processing


In [9]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from tqdm import tqdm

In [10]:
# Since the wine dataset doesn't have categorical columns in the sample
categorical_columns = []
numerical_columns = [
    'fixed_acidity', 'volatile_acidity', 'citric_acid',
    'residual_sugar', 'chlorides', 'free_sulfur_dioxide',
    'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol'
]

# Define the preprocessing steps
numerical_transformer = Pipeline(steps=[
    # Fill missing values with the mean
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())  # Standardize the numerical columns
])

categorical_transformer = Pipeline(steps=[
    # Fill missing values with the most frequent value
    ('imputer', SimpleImputer(strategy='most_frequent')),
    # One-hot encode categorical variables
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

# Combine the transformers into a single ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_columns),
        # Keeping categorical transformer in case you add categorical columns later
        ('cat', categorical_transformer, categorical_columns)
    ],
    remainder='drop'  # This will drop any columns not explicitly specified
)

# Drop the target column(s) and any irrelevant ones
X = dataset.drop(columns=['quality', 'binary_quality'])
y = dataset['binary_quality']  # Target variable for binary classification

In [11]:
def comprehensive_model_evaluation(X, y, classifier, param_grid, split_ratios=[0.2, 0.5, 0.8]):
    """
    Comprehensive model evaluation across multiple splits and configurations
    """
    all_results = []

    for test_size in tqdm(split_ratios, desc="Split Ratios"):
        for seed in tqdm(range(3), desc="Random Seeds", leave=False):
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=test_size, random_state=42 + seed)

            pipeline = Pipeline([
                ('preprocessor', preprocessor),
                ('classifier', classifier)
            ])

            grid_search = GridSearchCV(
                pipeline,
                param_grid,
                cv=5,
                scoring='accuracy',
                n_jobs=-1
            )
            grid_search.fit(X_train, y_train)

            best_model = grid_search.best_estimator_
            train_pred = best_model.predict(X_train)
            test_pred = best_model.predict(X_test)

            result = {
                'Test Size': f"{int((1-test_size)*100)}-{int(test_size*100)}",
                'Random Seed': seed,
                'Best Params': str(grid_search.best_params_),
                'Best CV Score': grid_search.best_score_,
                'Train Accuracy': accuracy_score(y_train, train_pred),
                'Test Accuracy': accuracy_score(y_test, test_pred),
                'Classifier': type(classifier).__name__
            }

            report = classification_report(y_test, test_pred, output_dict=True)
            result.update({
                'Macro Precision': report['macro avg']['precision'],
                'Macro Recall': report['macro avg']['recall'],
                'Macro F1-Score': report['macro avg']['f1-score']
            })

            # Add confusion matrix
            cm = confusion_matrix(y_test, test_pred)
            result['Confusion Matrix'] = cm

            all_results.append(result)

    results_df = pd.DataFrame(all_results)
    print("Evaluation Summary:")
    print(results_df.groupby(['Test Size', 'Classifier'])[
          ['Train Accuracy', 'Test Accuracy', 'Best CV Score']].agg(['mean', 'std']))

    # Optionally, you could print confusion matrices for all splits
    for result in all_results:
        print(f"Confusion Matrix for Test Size {
              result['Test Size']} and Seed {result['Random Seed']}:")
        print(result['Confusion Matrix'])

    return results_df

## Logistic Regression


In [12]:
lr_param_grid = {
    'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100],
    'classifier__penalty': ['l1', 'l2'],
    'classifier__solver': ['liblinear']
}

# Comprehensive evaluation
lr_results = comprehensive_model_evaluation(
    X, y,
    LogisticRegression(),
    lr_param_grid
)

lr_results

Split Ratios:   0%|          | 0/3 [00:00<?, ?it/s]

Split Ratios: 100%|██████████| 3/3 [00:05<00:00,  1.89s/it]

Evaluation Summary:
                             Train Accuracy           Test Accuracy            \
                                       mean       std          mean       std   
Test Size Classifier                                                            
19-80     LogisticRegression       0.743136  0.013117      0.741888  0.001659   
50-50     LogisticRegression       0.737274  0.003699      0.742998  0.007329   
80-20     LogisticRegression       0.740235  0.002309      0.734615  0.007807   

                             Best CV Score            
                                      mean       std  
Test Size Classifier                                  
19-80     LogisticRegression      0.736738  0.012617  
50-50     LogisticRegression      0.735732  0.006921  
80-20     LogisticRegression      0.741772  0.002835  
Confusion Matrix for Test Size 80-20 and Seed 0:
[[264 187]
 [169 680]]
Confusion Matrix for Test Size 80-20 and Seed 1:
[[254 205]
 [138 703]]
Confusion Matrix fo




Unnamed: 0,Test Size,Random Seed,Best Params,Best CV Score,Train Accuracy,Test Accuracy,Classifier,Macro Precision,Macro Recall,Macro F1-Score,Confusion Matrix
0,80-20,0,"{'classifier__C': 0.1, 'classifier__penalty': ...",0.744851,0.742544,0.726154,LogisticRegression,0.697007,0.693154,0.694913,"[[264, 187], [169, 680]]"
1,80-20,1,"{'classifier__C': 0.1, 'classifier__penalty': ...",0.741195,0.740235,0.736154,LogisticRegression,0.711094,0.694643,0.700416,"[[254, 205], [138, 703]]"
2,80-20,2,"{'classifier__C': 0.1, 'classifier__penalty': ...",0.73927,0.737926,0.741538,LogisticRegression,0.727629,0.710135,0.715687,"[[286, 207], [129, 678]]"
3,50-50,0,"{'classifier__C': 0.1, 'classifier__penalty': ...",0.741689,0.741071,0.740228,LogisticRegression,0.716525,0.704989,0.709479,"[[674, 483], [361, 1731]]"
4,50-50,1,"{'classifier__C': 1, 'classifier__penalty': 'l...",0.737368,0.737069,0.737458,LogisticRegression,0.717611,0.702637,0.707869,"[[681, 508], [345, 1715]]"
5,50-50,2,"{'classifier__C': 0.1, 'classifier__penalty': ...",0.72814,0.733682,0.751308,LogisticRegression,0.732142,0.719605,0.724412,"[[713, 469], [339, 1728]]"
6,19-80,0,"{'classifier__C': 1, 'classifier__penalty': 'l...",0.750609,0.756736,0.740092,LogisticRegression,0.720432,0.705604,0.710851,"[[1097, 803], [548, 2750]]"
7,19-80,1,"{'classifier__C': 100, 'classifier__penalty': ...",0.733662,0.742109,0.743363,LogisticRegression,0.729399,0.701574,0.709097,"[[1040, 881], [453, 2824]]"
8,19-80,2,"{'classifier__C': 1, 'classifier__penalty': 'l...",0.725943,0.730562,0.742209,LogisticRegression,0.724961,0.706426,0.712458,"[[1093, 825], [515, 2765]]"


## KNN


In [13]:
from sklearn.neighbors import KNeighborsClassifier

In [14]:
knn_param_grid = {
    # K from 1 to 104 with step 4
    'classifier__n_neighbors': np.arange(1, 105, 4),
    # 'uniform' or 'distance' weights
    'classifier__weights': ['uniform', 'distance'],
    'classifier__metric': ['euclidean', 'manhattan'],  # Distance metrics
}

# Assuming 'comprehensive_model_evaluation' is a function for evaluation
knn_results = comprehensive_model_evaluation(
    X, y,
    KNeighborsClassifier(),
    knn_param_grid
)

knn_results

Split Ratios: 100%|██████████| 3/3 [00:40<00:00, 13.36s/it]

Evaluation Summary:
                               Train Accuracy      Test Accuracy            \
                                         mean  std          mean       std   
Test Size Classifier                                                         
19-80     KNeighborsClassifier            1.0  0.0      0.753559  0.010185   
50-50     KNeighborsClassifier            1.0  0.0      0.801785  0.008276   
80-20     KNeighborsClassifier            1.0  0.0      0.827692  0.008104   

                               Best CV Score            
                                        mean       std  
Test Size Classifier                                    
19-80     KNeighborsClassifier      0.758549  0.007885  
50-50     KNeighborsClassifier      0.784690  0.005385  
80-20     KNeighborsClassifier      0.814956  0.006146  
Confusion Matrix for Test Size 80-20 and Seed 0:
[[312 139]
 [ 96 753]]
Confusion Matrix for Test Size 80-20 and Seed 1:
[[314 145]
 [ 69 772]]
Confusion Matrix for Test




Unnamed: 0,Test Size,Random Seed,Best Params,Best CV Score,Train Accuracy,Test Accuracy,Classifier,Macro Precision,Macro Recall,Macro F1-Score,Confusion Matrix
0,80-20,0,"{'classifier__metric': 'manhattan', 'classifie...",0.817395,1.0,0.819231,KNeighborsClassifier,0.804438,0.789361,0.795723,"[[312, 139], [96, 753]]"
1,80-20,1,"{'classifier__metric': 'manhattan', 'classifie...",0.807965,1.0,0.835385,KNeighborsClassifier,0.83086,0.801025,0.812057,"[[314, 145], [69, 772]]"
2,80-20,2,"{'classifier__metric': 'manhattan', 'classifie...",0.819508,1.0,0.828462,KNeighborsClassifier,0.829118,0.801062,0.810622,"[[339, 154], [69, 738]]"
3,50-50,0,"{'classifier__metric': 'manhattan', 'classifie...",0.790646,1.0,0.796553,KNeighborsClassifier,0.781699,0.765724,0.772113,"[[762, 395], [266, 1826]]"
4,50-50,1,"{'classifier__metric': 'manhattan', 'classifie...",0.780167,1.0,0.797476,KNeighborsClassifier,0.791482,0.761347,0.771202,"[[745, 444], [214, 1846]]"
5,50-50,2,"{'classifier__metric': 'manhattan', 'classifie...",0.783255,1.0,0.811327,KNeighborsClassifier,0.80139,0.783256,0.790337,"[[804, 378], [235, 1832]]"
6,19-80,0,"{'classifier__metric': 'euclidean', 'classifie...",0.767556,1.0,0.741824,KNeighborsClassifier,0.723375,0.704068,0.710328,"[[1071, 829], [513, 2785]]"
7,19-80,1,"{'classifier__metric': 'manhattan', 'classifie...",0.7552,1.0,0.7601,KNeighborsClassifier,0.759156,0.709679,0.720005,"[[992, 929], [318, 2959]]"
8,19-80,2,"{'classifier__metric': 'euclidean', 'classifie...",0.75289,1.0,0.758753,KNeighborsClassifier,0.749451,0.715855,0.724792,"[[1059, 859], [395, 2885]]"


## Decision Tree


In [15]:
from sklearn.tree import DecisionTreeClassifier

In [16]:
dt_param_grid = {
    # Maximum depth of the tree
    'classifier__max_depth': [None, 5, 10, 20, 30],
    # Minimum number of samples required to split a node
    'classifier__min_samples_split': [2, 5, 10],
    # Minimum number of samples required to be at a leaf node
    'classifier__min_samples_leaf': [1, 2, 4],
    # The function to measure the quality of a split
    'classifier__criterion': ['gini', 'entropy'],
    # Strategy used to split at each node
    'classifier__splitter': ['best', 'random'],
    # The number of features to consider for the best split
    'classifier__max_features': [None, 'sqrt', 'log2']
}

# Model evaluation using comprehensive_model_evaluation
dt_results = comprehensive_model_evaluation(
    X, y,
    DecisionTreeClassifier(),
    dt_param_grid
)

dt_results

Split Ratios: 100%|██████████| 3/3 [00:51<00:00, 17.19s/it]

Evaluation Summary:
                                 Train Accuracy           Test Accuracy  \
                                           mean       std          mean   
Test Size Classifier                                                      
19-80     DecisionTreeClassifier       0.767000  0.040372      0.721944   
50-50     DecisionTreeClassifier       0.922003  0.135094      0.748435   
80-20     DecisionTreeClassifier       0.996023  0.006888      0.776923   

                                           Best CV Score            
                                       std          mean       std  
Test Size Classifier                                                
19-80     DecisionTreeClassifier  0.021535      0.735974  0.002762  
50-50     DecisionTreeClassifier  0.001455      0.736144  0.005089  
80-20     DecisionTreeClassifier  0.025697      0.769093  0.004788  
Confusion Matrix for Test Size 80-20 and Seed 0:
[[303 148]
 [178 671]]
Confusion Matrix for Test Size 80-20 and Se




Unnamed: 0,Test Size,Random Seed,Best Params,Best CV Score,Train Accuracy,Test Accuracy,Classifier,Macro Precision,Macro Recall,Macro F1-Score,Confusion Matrix
0,80-20,0,"{'classifier__criterion': 'gini', 'classifier_...",0.774289,1.0,0.749231,DecisionTreeClassifier,0.724615,0.731091,0.727385,"[[303, 148], [178, 671]]"
1,80-20,1,"{'classifier__criterion': 'entropy', 'classifi...",0.764858,0.98807,0.8,DecisionTreeClassifier,0.781099,0.781099,0.781099,"[[329, 130], [130, 711]]"
2,80-20,2,"{'classifier__criterion': 'entropy', 'classifi...",0.768134,1.0,0.781538,DecisionTreeClassifier,0.768161,0.766425,0.767259,"[[347, 146], [138, 669]]"
3,50-50,0,"{'classifier__criterion': 'entropy', 'classifi...",0.741382,1.0,0.750077,DecisionTreeClassifier,0.727939,0.730987,0.729356,"[[769, 388], [424, 1668]]"
4,50-50,1,"{'classifier__criterion': 'gini', 'classifier_...",0.731217,1.0,0.747922,DecisionTreeClassifier,0.72856,0.729915,0.729214,"[[788, 401], [418, 1642]]"
5,50-50,2,"{'classifier__criterion': 'entropy', 'classifi...",0.735834,0.76601,0.747307,DecisionTreeClassifier,0.727514,0.730587,0.728928,"[[791, 391], [430, 1637]]"
6,19-80,0,"{'classifier__criterion': 'entropy', 'classifi...",0.736745,0.720554,0.697384,DecisionTreeClassifier,0.681579,0.626657,0.628143,"[[691, 1209], [364, 2934]]"
7,19-80,1,"{'classifier__criterion': 'entropy', 'classifi...",0.732908,0.793687,0.730858,DecisionTreeClassifier,0.7116,0.713413,0.712455,"[[1242, 679], [720, 2557]]"
8,19-80,2,"{'classifier__criterion': 'entropy', 'classifi...",0.738268,0.786759,0.737591,DecisionTreeClassifier,0.718431,0.719546,0.718971,"[[1248, 670], [694, 2586]]"


## Bagging Classifier


In [17]:
from sklearn.ensemble import BaggingClassifier

In [18]:
bagging_param_grid = {
    # Number of base estimators (trees)
    'classifier__n_estimators': [10, 50, 100, 200],
    # Proportion of samples to train each base estimator
    'classifier__max_samples': [0.5, 0.7, 1.0],
    # Proportion of features to train each base estimator
    'classifier__max_features': [0.5, 0.7, 1.0],
    # Whether to use bootstrap sampling
    'classifier__bootstrap': [True, False],
    # Base estimator (Decision Tree)
    'classifier__estimator': [DecisionTreeClassifier(random_state=42)],
    'classifier__random_state': [42]  # For reproducibility
}

# Model evaluation using comprehensive_model_evaluation
bagging_results = comprehensive_model_evaluation(
    X, y,
    BaggingClassifier(),
    bagging_param_grid
)

Split Ratios: 100%|██████████| 3/3 [05:39<00:00, 113.30s/it]

Evaluation Summary:
                            Train Accuracy           Test Accuracy            \
                                      mean       std          mean       std   
Test Size Classifier                                                           
19-80     BaggingClassifier       0.988196  0.010480      0.767731  0.007132   
50-50     BaggingClassifier       0.999282  0.000641      0.816662  0.008442   
80-20     BaggingClassifier       0.999615  0.000667      0.827949  0.007392   

                            Best CV Score            
                                     mean       std  
Test Size Classifier                                 
19-80     BaggingClassifier      0.773960  0.009506  
50-50     BaggingClassifier      0.795771  0.005167  
80-20     BaggingClassifier      0.823422  0.003964  
Confusion Matrix for Test Size 80-20 and Seed 0:
[[314 137]
 [ 97 752]]
Confusion Matrix for Test Size 80-20 and Seed 1:
[[331 128]
 [ 87 754]]
Confusion Matrix for Test Size 




In [19]:
bagging_results

Unnamed: 0,Test Size,Random Seed,Best Params,Best CV Score,Train Accuracy,Test Accuracy,Classifier,Macro Precision,Macro Recall,Macro F1-Score,Confusion Matrix
0,80-20,0,"{'classifier__bootstrap': True, 'classifier__e...",0.824899,0.998845,0.82,BaggingClassifier,0.804942,0.790989,0.79695,"[[314, 137], [97, 752]]"
1,80-20,1,"{'classifier__bootstrap': True, 'classifier__e...",0.818931,1.0,0.834615,BaggingClassifier,0.823371,0.808842,0.815032,"[[331, 128], [87, 754]]"
2,80-20,2,"{'classifier__bootstrap': False, 'classifier__...",0.826434,1.0,0.829231,BaggingClassifier,0.825152,0.806812,0.813821,"[[352, 141], [81, 726]]"
3,50-50,0,"{'classifier__bootstrap': False, 'classifier__...",0.801727,1.0,0.811942,BaggingClassifier,0.795894,0.791002,0.793288,"[[831, 326], [285, 1807]]"
4,50-50,1,"{'classifier__bootstrap': True, 'classifier__e...",0.792484,0.999076,0.811634,BaggingClassifier,0.802285,0.784247,0.791262,"[[811, 378], [234, 1826]]"
5,50-50,2,"{'classifier__bootstrap': False, 'classifier__...",0.793104,0.998768,0.826408,BaggingClassifier,0.816613,0.802716,0.808522,"[[846, 336], [228, 1839]]"
6,19-80,0,"{'classifier__bootstrap': True, 'classifier__e...",0.784491,0.984604,0.76087,BaggingClassifier,0.743532,0.730901,0.735807,"[[1177, 723], [520, 2778]]"
7,19-80,1,"{'classifier__bootstrap': True, 'classifier__e...",0.766014,0.979985,0.767218,BaggingClassifier,0.75717,0.729002,0.737461,"[[1119, 802], [408, 2869]]"
8,19-80,2,"{'classifier__bootstrap': True, 'classifier__e...",0.771375,1.0,0.775106,BaggingClassifier,0.759008,0.753818,0.756157,"[[1290, 628], [541, 2739]]"


## Random Forest


In [20]:
from sklearn.ensemble import RandomForestClassifier

In [21]:
rf_param_grid = {
    'classifier__n_estimators': [1024],  # Number of trees set to 1024
    # Various options for max_features
    'classifier__max_features': [1, 2, 4, 6, 8, 12, 16, 20],
    'classifier__random_state': [42]  # For reproducibility
}

# Model evaluation using comprehensive_model_evaluation
rf_results = comprehensive_model_evaluation(
    X, y,
    RandomForestClassifier(),
    rf_param_grid
)

rf_results

Split Ratios:   0%|          | 0/3 [00:00<?, ?it/s]

Split Ratios: 100%|██████████| 3/3 [07:21<00:00, 147.22s/it]

Evaluation Summary:
                                 Train Accuracy      Test Accuracy            \
                                           mean  std          mean       std   
Test Size Classifier                                                           
19-80     RandomForestClassifier            1.0  0.0      0.773310  0.006232   
50-50     RandomForestClassifier            1.0  0.0      0.815020  0.012114   
80-20     RandomForestClassifier            1.0  0.0      0.830256  0.004700   

                                 Best CV Score            
                                          mean       std  
Test Size Classifier                                      
19-80     RandomForestClassifier      0.774973  0.011675  
50-50     RandomForestClassifier      0.796390  0.005101  
80-20     RandomForestClassifier      0.823613  0.004998  
Confusion Matrix for Test Size 80-20 and Seed 0:
[[329 122]
 [100 749]]
Confusion Matrix for Test Size 80-20 and Seed 1:
[[335 124]
 [ 90 751]]
C




Unnamed: 0,Test Size,Random Seed,Best Params,Best CV Score,Train Accuracy,Test Accuracy,Classifier,Macro Precision,Macro Recall,Macro F1-Score,Confusion Matrix
0,80-20,0,"{'classifier__max_features': 1, 'classifier__n...",0.824706,1.0,0.829231,RandomForestClassifier,0.813415,0.805852,0.809329,"[[329, 122], [100, 749]]"
1,80-20,1,"{'classifier__max_features': 4, 'classifier__n...",0.818159,1.0,0.835385,RandomForestClassifier,0.823261,0.811416,0.816605,"[[335, 124], [90, 751]]"
2,80-20,2,"{'classifier__max_features': 6, 'classifier__n...",0.827974,1.0,0.826154,RandomForestClassifier,0.818363,0.80828,0.812568,"[[362, 131], [95, 712]]"
3,50-50,0,"{'classifier__max_features': 1, 'classifier__n...",0.801729,1.0,0.806402,RandomForestClassifier,0.790992,0.781098,0.785418,"[[802, 355], [274, 1818]]"
4,50-50,1,"{'classifier__max_features': 1, 'classifier__n...",0.795876,1.0,0.809788,RandomForestClassifier,0.800992,0.781012,0.788581,"[[801, 388], [230, 1830]]"
5,50-50,2,"{'classifier__max_features': 1, 'classifier__n...",0.791565,1.0,0.82887,RandomForestClassifier,0.820209,0.804107,0.810684,"[[843, 339], [217, 1850]]"
6,19-80,0,"{'classifier__max_features': 2, 'classifier__n...",0.788334,1.0,0.769334,RandomForestClassifier,0.751642,0.746831,0.749015,"[[1260, 640], [559, 2739]]"
7,19-80,1,"{'classifier__max_features': 4, 'classifier__n...",0.769846,1.0,0.770104,RandomForestClassifier,0.760148,0.732799,0.741205,"[[1133, 788], [407, 2870]]"
8,19-80,2,"{'classifier__max_features': 6, 'classifier__n...",0.766739,1.0,0.780492,RandomForestClassifier,0.766418,0.754298,0.75916,"[[1255, 663], [478, 2802]]"


In [22]:
combined_results = pd.concat([
    lr_results,
    knn_results,
    dt_results,
    bagging_results,
    rf_results
])

# Aggregate and compare key metrics
comparison_summary = combined_results.groupby('Classifier')[
    ['Test Accuracy', 'Macro F1-Score', 'Best CV Score']
].agg(['mean', 'std'])

In [23]:
comparison_summary

Unnamed: 0_level_0,Test Accuracy,Test Accuracy,Macro F1-Score,Macro F1-Score,Best CV Score,Best CV Score
Unnamed: 0_level_1,mean,std,mean,std,mean,std
Classifier,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
BaggingClassifier,0.804114,0.028507,0.783145,0.031657,0.797718,0.022227
DecisionTreeClassifier,0.749101,0.02913,0.724757,0.042669,0.74707,0.016939
KNeighborsClassifier,0.794346,0.033482,0.767464,0.039602,0.786065,0.025098
LogisticRegression,0.739834,0.006701,0.709465,0.008436,0.738081,0.007851
RandomForestClassifier,0.806196,0.026528,0.785841,0.029363,0.798325,0.022193


In [24]:
comparison_summary.to_excel('wine_comparison_summary.xlsx')

In [25]:
avg_performance = (
    combined_results.groupby(['Test Size', 'Classifier'])[
        ['Best CV Score', 'Train Accuracy', 'Test Accuracy', 'Macro Precision', 'Macro Recall', 'Macro F1-Score']
    ]
    .mean()
    .reset_index()
)

# Sorting results for better readability
avg_performance = avg_performance.sort_values(by=['Classifier']).reset_index(drop=True)

avg_performance

Unnamed: 0,Test Size,Classifier,Best CV Score,Train Accuracy,Test Accuracy,Macro Precision,Macro Recall,Macro F1-Score
0,19-80,BaggingClassifier,0.77396,0.988196,0.767731,0.753237,0.737907,0.743142
1,50-50,BaggingClassifier,0.795771,0.999282,0.816662,0.804931,0.792655,0.797691
2,80-20,BaggingClassifier,0.823422,0.999615,0.827949,0.817822,0.802215,0.808601
3,19-80,DecisionTreeClassifier,0.735974,0.767,0.721944,0.70387,0.686539,0.686523
4,50-50,DecisionTreeClassifier,0.736144,0.922003,0.748435,0.728004,0.730496,0.729166
5,80-20,DecisionTreeClassifier,0.769093,0.996023,0.776923,0.757958,0.759538,0.758581
6,19-80,KNeighborsClassifier,0.758549,1.0,0.753559,0.743994,0.709867,0.718375
7,50-50,KNeighborsClassifier,0.78469,1.0,0.801785,0.791524,0.770109,0.777884
8,80-20,KNeighborsClassifier,0.814956,1.0,0.827692,0.821472,0.79715,0.806134
9,19-80,LogisticRegression,0.736738,0.743136,0.741888,0.72493,0.704535,0.710802


In [26]:
avg_performance.to_excel('wine_average_performance.xlsx')