In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


from sklearn.svm import SVC


from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler


from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler


In [2]:
# Load dataset
df = pd.read_excel(r"C:\Obesity_Dataset.xlsx")



In [3]:
# Target column (update if different)
target = "Class"
X = df.drop(columns=[target])
y = df[target].astype(str)



In [4]:
# Identify categorical and numerical columns
cat_cols = X.select_dtypes(include=['object']).columns
num_cols = X.select_dtypes(include=[np.number]).columns



In [5]:
# Preprocessing
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ("num", "passthrough", num_cols)
])



In [6]:
# Model
rf = RandomForestClassifier(random_state=42)

pipeline = Pipeline([
    ("pre", preprocessor),
    ("clf", rf)
])

In [7]:
# Hyperparameters
param_grid = {
    "clf__n_estimators": [200, 300, 500],
    "clf__max_depth": [None, 10, 20]
}



In [8]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

grid = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=1)
grid.fit(X_train, y_train)



Fitting 3 folds for each of 9 candidates, totalling 27 fits


In [9]:
y_pred = grid.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.8726708074534162
              precision    recall  f1-score   support

           1       1.00      0.53      0.70        15
           2       0.89      0.95      0.92       132
           3       0.85      0.85      0.85       118
           4       0.84      0.84      0.84        57

    accuracy                           0.87       322
   macro avg       0.90      0.79      0.83       322
weighted avg       0.87      0.87      0.87       322

[[  8   5   1   1]
 [  0 125   7   0]
 [  0  10 100   8]
 [  0   0   9  48]]


In [10]:
# SVM

pipeline = Pipeline([
    ("pre", ColumnTransformer([
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", "passthrough", num_cols)
    ])),
    ("clf", SVC())
])



In [11]:
param_grid = {
    "clf__C": [0.5, 1, 2],
    "clf__kernel": ["rbf"],
    "clf__gamma": ["scale", "auto"]
}




In [12]:
grid = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=1)
grid.fit(X_train, y_train)


Fitting 3 folds for each of 6 candidates, totalling 18 fits


In [13]:
print("Best Params:", grid.best_params_)
y_pred = grid.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Best Params: {'clf__C': 2, 'clf__gamma': 'auto', 'clf__kernel': 'rbf'}
Accuracy: 0.7919254658385093
              precision    recall  f1-score   support

           1       0.75      0.60      0.67        15
           2       0.83      0.88      0.85       132
           3       0.77      0.75      0.76       118
           4       0.76      0.72      0.74        57

    accuracy                           0.79       322
   macro avg       0.78      0.74      0.75       322
weighted avg       0.79      0.79      0.79       322

[[  9   6   0   0]
 [  1 116  14   1]
 [  2  15  89  12]
 [  0   3  13  41]]


In [14]:
#KNN

pipeline = Pipeline([
    ("pre", ColumnTransformer([
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", StandardScaler(), num_cols)
    ])),
    ("clf", KNeighborsClassifier())
])




In [15]:
param_grid = {
    "clf__n_neighbors": [3, 5, 7, 9],
    "clf__weights": ["uniform", "distance"]
}



In [16]:
grid = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=1)
grid.fit(X_train, y_train)


Fitting 3 folds for each of 8 candidates, totalling 24 fits


In [17]:

print("Best Params:", grid.best_params_)
y_pred = grid.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Best Params: {'clf__n_neighbors': 3, 'clf__weights': 'distance'}
Accuracy: 0.7546583850931677
              precision    recall  f1-score   support

           1       0.73      0.53      0.62        15
           2       0.82      0.84      0.83       132
           3       0.72      0.71      0.71       118
           4       0.69      0.70      0.70        57

    accuracy                           0.75       322
   macro avg       0.74      0.70      0.71       322
weighted avg       0.75      0.75      0.75       322

[[  8   6   1   0]
 [  2 111  18   1]
 [  1  16  84  17]
 [  0   3  14  40]]


In [18]:

pipeline = Pipeline([
    ("pre", ColumnTransformer([
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", StandardScaler(), num_cols)
    ])),
    ("clf", MLPClassifier(max_iter=400, random_state=42))
])



In [19]:
param_grid = {
    "clf__hidden_layer_sizes": [(64,), (128,), (64,32)],
    "clf__alpha": [1e-4, 1e-3],
    "clf__learning_rate_init": [0.001, 0.01]
}




In [20]:
grid = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=1)
grid.fit(X_train, y_train)


Fitting 3 folds for each of 12 candidates, totalling 36 fits


In [21]:
print("Best Params:", grid.best_params_)
y_pred = grid.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Best Params: {'clf__alpha': 0.001, 'clf__hidden_layer_sizes': (64, 32), 'clf__learning_rate_init': 0.01}
Accuracy: 0.7577639751552795
              precision    recall  f1-score   support

           1       0.59      0.87      0.70        15
           2       0.80      0.87      0.83       132
           3       0.84      0.57      0.68       118
           4       0.64      0.86      0.74        57

    accuracy                           0.76       322
   macro avg       0.72      0.79      0.74       322
weighted avg       0.78      0.76      0.75       322

[[ 13   1   1   0]
 [  7 115   8   2]
 [  2  24  67  25]
 [  0   4   4  49]]
