In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split,StratifiedShuffleSplit,cross_val_score 
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings("ignore")


In [5]:
df = pd.read_excel(r"C:\Obesity_Dataset.xlsx")

In [6]:
scaler = StandardScaler()
X = df.drop('Class', axis=1)
y = df['Class']
X_scaled = scaler.fit_transform(X)
Y_scaled=pd.DataFrame(X_scaled)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)



In [8]:
def evaluate_model(name, model,X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\n{name} Accuracy: {round(accuracy*100, 2)}%")
    print(classification_report(y_test, y_pred))
    return name, accuracy

In [14]:
results = []
results.append(evaluate_model("ANN", MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=500, random_state=42),X_train, y_train, X_test, y_test))
results.append(evaluate_model("SVM", SVC(kernel='rbf', probability=True, random_state=42), X_train, y_train, X_test, y_test))
results.append(evaluate_model("KNN", KNeighborsClassifier(n_neighbors=5), X_train, y_train, X_test, y_test))
results.append(evaluate_model("Random Forest", RandomForestClassifier(n_estimators=100, random_state=42), X_train, y_train, X_test, y_test))


ANN Accuracy: 76.6%
              precision    recall  f1-score   support

           1       0.59      0.85      0.69        20
           2       0.82      0.83      0.82       192
           3       0.82      0.69      0.75       193
           4       0.63      0.78      0.70        78

    accuracy                           0.77       483
   macro avg       0.71      0.79      0.74       483
weighted avg       0.78      0.77      0.77       483


SVM Accuracy: 76.19%
              precision    recall  f1-score   support

           1       0.64      0.45      0.53        20
           2       0.82      0.81      0.81       192
           3       0.78      0.74      0.76       193
           4       0.64      0.78      0.71        78

    accuracy                           0.76       483
   macro avg       0.72      0.70      0.70       483
weighted avg       0.77      0.76      0.76       483


KNN Accuracy: 70.81%
              precision    recall  f1-score   support

          

In [10]:
summary = pd.DataFrame(results, columns=["Model", "Accuracy"])
summary.sort_values(by="Accuracy", ascending=False).reset_index(drop=True)

Unnamed: 0,Model,Accuracy
0,Random Forest,0.869565
1,ANN,0.78882
2,SVM,0.776398
3,KNN,0.723602


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(y_pred) 
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy*100:.2f}%")
print(classification_report(y_test, y_pred))

[2 2 4 3 2 2 3 2 2 3 4 4 3 4 4 2 2 2 4 4 2 4 2 3 2 3 3 4 1 2 2 4 2 2 3 2 3
 4 1 3 2 3 2 3 3 2 4 2 3 4 3 4 2 1 3 2 3 2 4 2 2 3 2 2 1 2 2 2 2 2 2 3 2 3
 3 4 3 3 4 3 2 2 3 2 3 1 3 3 4 2 2 2 3 3 2 1 3 4 2 2 3 3 3 4 2 3 4 3 4 3 3
 2 4 2 3 3 4 4 2 3 3 2 2 2 3 3 2 3 3 2 2 3 3 2 1 2 4 2 3 3 4 2 3 3 3 2 3 2
 2 2 2 4 2 3 2 1 2 3 3 4 2 4 2 3 3 3 3 3 4 3 3 2 3 2 4 3 3 3 3 4 3 3 2 2 4
 3 4 2 2 4 3 3 2 3 2 4 2 2 3 2 4 2 2 4 2 2 2 3 2 2 3 4 3 2 2 3 4 3 2 2 2 3
 2 2 3 3 2 4 3 2 3 4 3 3 3 3 2 1 4 4 2 3 2 3 1 2 3 3 4 3 2 3 2 4 2 2 1 2 1
 2 2 1 2 3 4 3 2 3 2 3 2 2 3 2 4 4 3 2 3 2 3 4 2 4 3 3 3 3 2 3 3 3 3 2 2 2
 3 2 1 3 3 2 3 2 4 4 3 2 4 2 3 3 2 3 3 3 2 4 3 4 3 2 3 4 2 4 3 3 2 4 2 4 2
 1 3 2 4 3 3 3 3 3 3 4 4 1 2 3 2 3 2 4 2 2 2 4 2 2 3 3 2 2 2 2 3 3 3 1 2 3
 3 3 4 3 2 3 2 3 2 4 2 3 2 3 3 1 3 4 3 2 3 2 2 3 3 3 3 2 2 3 3 4 3 2 4 3 3
 2 3 3 3 3 3 3 2 2 2 3 2 3 4 3 2 3 4 3 2 3 2 4 3 1 2 2 1 3 2 4 4 4 2 2 2 3
 2 3 2 2 2 3 2 3 2 3 3 4 2 3 2 2 3 2 4 2 2 4 2 2 2 3 2 4 3 2 2 3 2 2 3 2 2
 2 3]
Random Forest Accur

In [12]:
model = RandomForestClassifier()
scores = cross_val_score(model, X_scaled, y, cv=5, scoring='accuracy')
print("Accuracy for each fold:", scores)


Accuracy for each fold: [0.88509317 0.89440994 0.83229814 0.90993789 0.88819876]
