In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
df1 = pd.read_csv('/content/sample_data/train.csv')

In [3]:
df1.head()

Unnamed: 0,id,Gender,Age,Height,Weight,family_history_with_overweight,FAVC,FCVC,NCP,CAEC,SMOKE,CH2O,SCC,FAF,TUE,CALC,MTRANS,NObeyesdad
0,0,Male,24.443011,1.699998,81.66995,yes,yes,2.0,2.983297,Sometimes,no,2.763573,no,0.0,0.976473,Sometimes,Public_Transportation,Overweight_Level_II
1,1,Female,18.0,1.56,57.0,yes,yes,2.0,3.0,Frequently,no,2.0,no,1.0,1.0,no,Automobile,Normal_Weight
2,2,Female,18.0,1.71146,50.165754,yes,yes,1.880534,1.411685,Sometimes,no,1.910378,no,0.866045,1.673584,no,Public_Transportation,Insufficient_Weight
3,3,Female,20.952737,1.71073,131.274851,yes,yes,3.0,3.0,Sometimes,no,1.674061,no,1.467863,0.780199,Sometimes,Public_Transportation,Obesity_Type_III
4,4,Male,31.641081,1.914186,93.798055,yes,yes,2.679664,1.971472,Sometimes,no,1.979848,no,1.967973,0.931721,Sometimes,Public_Transportation,Overweight_Level_II


In [4]:
df1['BMI'] = df1['Weight'] / (df1['Height'])**2

In [5]:

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from sklearn.compose import ColumnTransformer

In [6]:
df1 = df1.drop(columns=['id']).dropna()

In [7]:
age_bins = [0, 12, 19, 35, 60, 100]
age_labels = ['Child', 'Teen', 'Young Adult', 'Middle-Aged Adult', 'Senior']

df1['Age'] = pd.cut(df1['Age'], bins=age_bins, labels=age_labels, right=False)
df1['Age'] = df1['Age'].astype(object)

In [8]:
numeric_features = ['Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE', 'BMI']
categorical_features = ['Gender', 'Age', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS']
features = numeric_features + categorical_features

In [9]:
St = StandardScaler()
for col in df1:
    if df1[col].dtype == 'int64' or df1[col].dtype == 'float64':
        df1[col] = St.fit_transform(df1[col].values.reshape(-1, 1))

In [10]:
le = LabelEncoder()
for col in df1:
    if df1[col].dtype == 'object':
        df1[col] = le.fit_transform(df1[col])

In [11]:
X = df1[features]
y = df1['NObeyesdad']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
RM = RandomForestClassifier()
RM.fit(X_train, y_train)
y_pred12 = RM.predict(X_train)
y_pred1 = RM.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred12))
print("Accuracy:", accuracy_score(y_test, y_pred1))
print("\nClassification Report:")
print(classification_report(y_test, y_pred1))

Accuracy: 0.9993375888233169
Accuracy: 0.8954720616570327

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.93      0.94       524
           1       0.87      0.88      0.87       626
           2       0.86      0.86      0.86       543
           3       0.97      0.96      0.97       657
           4       1.00      1.00      1.00       804
           5       0.77      0.76      0.76       484
           6       0.79      0.80      0.79       514

    accuracy                           0.90      4152
   macro avg       0.88      0.88      0.88      4152
weighted avg       0.90      0.90      0.90      4152



In [14]:
LR = LogisticRegression()
LR.fit(X_train, y_train)
y_pred22 = LR.predict(X_train)
y_pred2 = LR.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred22))
print("Accuracy:", accuracy_score(y_test, y_pred2))
print("\nClassification Report:")
print(classification_report(y_test, y_pred2))

Accuracy: 0.8584246657834518
Accuracy: 0.8566955684007708

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.93      0.90       524
           1       0.84      0.81      0.82       626
           2       0.81      0.80      0.81       543
           3       0.94      0.96      0.95       657
           4       0.99      1.00      1.00       804
           5       0.72      0.68      0.70       484
           6       0.70      0.71      0.70       514

    accuracy                           0.86      4152
   macro avg       0.84      0.84      0.84      4152
weighted avg       0.86      0.86      0.86      4152



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
NB = GaussianNB()
NB.fit(X_train, y_train)
y_pred32 = NB.predict(X_train)
y_pred3 = NB.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred32))
print("Accuracy:", accuracy_score(y_test, y_pred3))
print("\nClassification Report:")
print(classification_report(y_test, y_pred3))

Accuracy: 0.7776707214259906
Accuracy: 0.773121387283237

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.94      0.87       524
           1       0.84      0.65      0.73       626
           2       0.62      0.62      0.62       543
           3       0.80      0.94      0.87       657
           4       0.96      1.00      0.98       804
           5       0.66      0.51      0.58       484
           6       0.58      0.60      0.59       514

    accuracy                           0.77      4152
   macro avg       0.75      0.75      0.75      4152
weighted avg       0.77      0.77      0.77      4152



In [16]:
DT = DecisionTreeClassifier()
DT.fit(X_train, y_train)
y_pred42 = DT.predict(X_train)
y_pred4 = DT.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred42))
print("Accuracy:", accuracy_score(y_test, y_pred4))
print("\nClassification Report:")
print(classification_report(y_test, y_pred4))

Accuracy: 0.9993978080211972
Accuracy: 0.8383911368015414

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.87      0.87       524
           1       0.79      0.80      0.79       626
           2       0.79      0.80      0.80       543
           3       0.96      0.94      0.95       657
           4       0.99      1.00      0.99       804
           5       0.64      0.66      0.65       484
           6       0.70      0.69      0.70       514

    accuracy                           0.84      4152
   macro avg       0.82      0.82      0.82      4152
weighted avg       0.84      0.84      0.84      4152



In [17]:
SVM = SVC()
SVM.fit(X_train, y_train)
y_pred52 = SVM.predict(X_train)
y_pred5 = SVM.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred52))
print("Accuracy:", accuracy_score(y_test, y_pred5))
print("\nClassification Report:")
print(classification_report(y_test, y_pred5))

Accuracy: 0.88769119595327
Accuracy: 0.8783718689788054

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.94      0.92       524
           1       0.87      0.83      0.85       626
           2       0.85      0.85      0.85       543
           3       0.96      0.96      0.96       657
           4       1.00      1.00      1.00       804
           5       0.73      0.73      0.73       484
           6       0.74      0.75      0.75       514

    accuracy                           0.88      4152
   macro avg       0.86      0.87      0.87      4152
weighted avg       0.88      0.88      0.88      4152



In [18]:
XGB = XGBClassifier()
XGB.fit(X_train, y_train)
y_pred62 = XGB.predict(X_train)
y_pred6 = XGB.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred62))
print("Accuracy:", accuracy_score(y_test, y_pred6))
print("\nClassification Report:")
print(classification_report(y_test, y_pred6))

Accuracy: 0.984945200529929
Accuracy: 0.9019749518304432

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94       524
           1       0.88      0.88      0.88       626
           2       0.88      0.87      0.88       543
           3       0.98      0.96      0.97       657
           4       0.99      1.00      1.00       804
           5       0.77      0.79      0.78       484
           6       0.80      0.80      0.80       514

    accuracy                           0.90      4152
   macro avg       0.89      0.89      0.89      4152
weighted avg       0.90      0.90      0.90      4152



In [19]:
ABC = AdaBoostClassifier()
ABC.fit(X_train, y_train)
y_pred72 = ABC.predict(X_train)
y_pred7 = ABC.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred72))
print("Accuracy:", accuracy_score(y_test, y_pred7))
print("\nClassification Report:")
print(classification_report(y_test, y_pred7))



Accuracy: 0.5313742020956281
Accuracy: 0.5358863198458574

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.24      0.36       524
           1       0.52      0.73      0.60       626
           2       0.86      0.52      0.65       543
           3       0.39      0.98      0.56       657
           4       0.00      0.00      0.00       804
           5       0.65      0.62      0.63       484
           6       0.64      0.81      0.71       514

    accuracy                           0.54      4152
   macro avg       0.53      0.56      0.50      4152
weighted avg       0.49      0.54      0.47      4152



In [20]:
GBC = GradientBoostingClassifier()
GBC.fit(X_train, y_train)
y_pred82 = GBC.predict(X_train)
y_pred8 = GBC.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred82))
print("Accuracy:", accuracy_score(y_test, y_pred8))
print("\nClassification Report:")
print(classification_report(y_test, y_pred8))

Accuracy: 0.9158135613633627
Accuracy: 0.8976396917148363

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94       524
           1       0.87      0.89      0.88       626
           2       0.88      0.87      0.87       543
           3       0.98      0.96      0.97       657
           4       1.00      1.00      1.00       804
           5       0.77      0.77      0.77       484
           6       0.78      0.78      0.78       514

    accuracy                           0.90      4152
   macro avg       0.89      0.89      0.89      4152
weighted avg       0.90      0.90      0.90      4152



In [21]:
KNN = KNeighborsClassifier()
KNN.fit(X_train, y_train)
y_pred92 = KNN.predict(X_train)
y_pred9 = KNN.predict(X_test)

print("Accuracy:", accuracy_score(y_train, y_pred92))
print("Accuracy:", accuracy_score(y_test, y_pred9))
print("\nClassification Report:")
print(classification_report(y_test, y_pred9))

Accuracy: 0.8577020354088883
Accuracy: 0.7834778420038536

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.85      0.84       524
           1       0.65      0.71      0.68       626
           2       0.72      0.79      0.75       543
           3       0.94      0.95      0.95       657
           4       0.99      1.00      0.99       804
           5       0.53      0.47      0.50       484
           6       0.64      0.55      0.59       514

    accuracy                           0.78      4152
   macro avg       0.76      0.76      0.76      4152
weighted avg       0.78      0.78      0.78      4152

