In [11]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

# Read the dataset
data = pd.read_csv('ObesityDataSet.csv')

# Keep only the desired columns
desired_columns = ['Height', 'Weight', 'eat_vegetables_frequency', 'exercise_frequency','water_day','Age','device_time', 'Gender',
                   'family_history_with_overweight', 'eat_high_caloric_food', 'eat_between_meals', 'smoke', 'monitor_calories', 'transport_mode']
data = data[desired_columns + ['obese_category']]  # Add 'obese_category' to keep it as well

# Apply label encoding to 'obese_category'
label_encoder = LabelEncoder()
data['obese_category'] = label_encoder.fit_transform(data['obese_category'])

# Extract features and target variable
tr_features = data.drop(columns=['obese_category'])  # Features
tr_labels = data['obese_category']  # Target variable

# Define the columns to one-hot encode
nominal_columns = ['Gender', 'family_history_with_overweight', 'eat_high_caloric_food', 
                   'eat_between_meals', 'smoke', 'monitor_calories', 'transport_mode']

# Create a pipeline for preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), nominal_columns)
    ],
    remainder='passthrough'
)

# Define the MLP model
mlp = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', MLPClassifier())
])

# Define the parameters for grid search
parameters = {
    'classifier__hidden_layer_sizes': [(10,), (50, 10), (100,)],
    'classifier__activation': ['relu', 'tanh', 'logistic'],
    'classifier__learning_rate': ['constant'],
    'classifier__learning_rate_init': [0.01, 0.5, 0.1, 0.2],
    'classifier__max_iter': [7000],
    'classifier__solver': ['adam']
}

# Perform grid search
cv = GridSearchCV(mlp, parameters, cv=5)
cv.fit(tr_features, tr_labels)

# Print results
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))
    
    mean = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(mean, stds, results.cv_results_['params']):
        print('{} (+/- {}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

print_results(cv)


BEST PARAMS: {'classifier__activation': 'tanh', 'classifier__hidden_layer_sizes': (100,), 'classifier__learning_rate': 'constant', 'classifier__learning_rate_init': 0.01, 'classifier__max_iter': 7000, 'classifier__solver': 'adam'}

0.792 (+/- 0.13) for {'classifier__activation': 'relu', 'classifier__hidden_layer_sizes': (10,), 'classifier__learning_rate': 'constant', 'classifier__learning_rate_init': 0.01, 'classifier__max_iter': 7000, 'classifier__solver': 'adam'}
0.158 (+/- 0.012) for {'classifier__activation': 'relu', 'classifier__hidden_layer_sizes': (10,), 'classifier__learning_rate': 'constant', 'classifier__learning_rate_init': 0.5, 'classifier__max_iter': 7000, 'classifier__solver': 'adam'}
0.452 (+/- 0.47) for {'classifier__activation': 'relu', 'classifier__hidden_layer_sizes': (10,), 'classifier__learning_rate': 'constant', 'classifier__learning_rate_init': 0.1, 'classifier__max_iter': 7000, 'classifier__solver': 'adam'}
0.164 (+/- 0.01) for {'classifier__activation': 'relu',