In [None]:
import math
import pandas as pd 
from colorthief import ColorThief
import os 
import uuid
from joblib import Parallel, delayed 
from sklearn.model_selection import train_test_split 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier

from Hyprparam_optimized.searcher import GridSearch, RandomSearch
from Hyprparam_optimized.parallel_process import ParallelExecutor
from Hyprparam_optimized.logger import log_to_csv, log_to_json

In [2]:
df = pd.read_csv(r"fast_food_consumption_health_impact_dataset.csv")

df.tail()

Unnamed: 0,Age,Gender,Fast_Food_Meals_Per_Week,Average_Daily_Calories,BMI,Physical_Activity_Hours_Per_Week,Sleep_Hours_Per_Day,Energy_Level_Score,Digestive_Issues,Doctor_Visits_Per_Year,Overall_Health_Score
795,40,Male,6,3244,29.6,9.3,6.0,1,Yes,1,4
796,31,Other,0,2352,24.9,9.2,4.7,7,Yes,0,5
797,41,Other,11,2466,18.3,6.4,4.1,6,No,0,5
798,19,Male,8,3047,27.9,6.3,6.7,2,Yes,8,5
799,43,Female,14,1741,22.3,4.0,8.9,2,Yes,3,9


In [3]:
df['Overall_Health_Score'].value_counts()

Overall_Health_Score
1    110
9    109
4     94
3     86
6     85
2     84
5     81
7     78
8     73
Name: count, dtype: int64

In [4]:
mapped = {'Male':0,'Female':1}
mapped2 = {'Yes':1,'No':0}
df['Gender'] = df['Gender'].map(mapped)
df['Digestive_Issues'] = df['Digestive_Issues'].map(mapped2)
df

Unnamed: 0,Age,Gender,Fast_Food_Meals_Per_Week,Average_Daily_Calories,BMI,Physical_Activity_Hours_Per_Week,Sleep_Hours_Per_Day,Energy_Level_Score,Digestive_Issues,Doctor_Visits_Per_Year,Overall_Health_Score
0,56,0.0,1,3153,28.4,4.5,7.8,9,0,7,5
1,46,0.0,12,1748,22.8,9.6,6.7,2,0,4,3
2,32,1.0,8,3020,21.5,4.0,7.4,2,1,7,4
3,25,1.0,6,2621,26.8,8.4,6.1,6,0,7,8
4,38,1.0,14,2260,18.1,1.9,7.7,7,0,5,4
...,...,...,...,...,...,...,...,...,...,...,...
795,40,0.0,6,3244,29.6,9.3,6.0,1,1,1,4
796,31,,0,2352,24.9,9.2,4.7,7,1,0,5
797,41,,11,2466,18.3,6.4,4.1,6,0,0,5
798,19,0.0,8,3047,27.9,6.3,6.7,2,1,8,5


In [5]:
df = df.drop_duplicates()
df = df.dropna()

In [6]:
df['Overall_Health_Score'].min()

np.int64(1)

In [7]:
y = df['Overall_Health_Score']
x = df.drop(columns='Overall_Health_Score')
x

Unnamed: 0,Age,Gender,Fast_Food_Meals_Per_Week,Average_Daily_Calories,BMI,Physical_Activity_Hours_Per_Week,Sleep_Hours_Per_Day,Energy_Level_Score,Digestive_Issues,Doctor_Visits_Per_Year
0,56,0.0,1,3153,28.4,4.5,7.8,9,0,7
1,46,0.0,12,1748,22.8,9.6,6.7,2,0,4
2,32,1.0,8,3020,21.5,4.0,7.4,2,1,7
3,25,1.0,6,2621,26.8,8.4,6.1,6,0,7
4,38,1.0,14,2260,18.1,1.9,7.7,7,0,5
...,...,...,...,...,...,...,...,...,...,...
793,43,1.0,13,2040,28.0,2.2,5.5,5,1,8
794,45,0.0,6,1692,29.9,8.9,7.9,1,0,5
795,40,0.0,6,3244,29.6,9.3,6.0,1,1,1
798,19,0.0,8,3047,27.9,6.3,6.7,2,1,8


In [8]:
x

Unnamed: 0,Age,Gender,Fast_Food_Meals_Per_Week,Average_Daily_Calories,BMI,Physical_Activity_Hours_Per_Week,Sleep_Hours_Per_Day,Energy_Level_Score,Digestive_Issues,Doctor_Visits_Per_Year
0,56,0.0,1,3153,28.4,4.5,7.8,9,0,7
1,46,0.0,12,1748,22.8,9.6,6.7,2,0,4
2,32,1.0,8,3020,21.5,4.0,7.4,2,1,7
3,25,1.0,6,2621,26.8,8.4,6.1,6,0,7
4,38,1.0,14,2260,18.1,1.9,7.7,7,0,5
...,...,...,...,...,...,...,...,...,...,...
793,43,1.0,13,2040,28.0,2.2,5.5,5,1,8
794,45,0.0,6,1692,29.9,8.9,7.9,1,0,5
795,40,0.0,6,3244,29.6,9.3,6.0,1,1,1
798,19,0.0,8,3047,27.9,6.3,6.7,2,1,8


In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [None]:
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", GradientBoostingClassifier())
])

In [None]:
param_space = {
    "clf__n_estimators": [300, 500],
    "clf__max_depth": [None, 20, 40],
    "clf__min_samples_split": [2, 5],
    "clf__min_samples_leaf": [1, 2],
    "clf__max_features": ["sqrt", "log2"], 
}

In [None]:
grid = GridSearch(param_space)
grid_params = grid.generate()

executor = ParallelExecutor(n_jobs=-1)
grid_results = executor.run(
    estimator=pipeline,
    X=x_train,
    y=y_train,
    param_list=grid_params,
    cv=5,
    scoring="accuracy"
)

log_to_csv(grid_results, "sample_logs/grid_results.csv")


In [None]:
random_search = RandomSearch(param_space, n_iter=10, random_state=42)
random_params = random_search.generate()

random_results = executor.run(
    estimator=pipeline,
    X=x_train,
    y=y_train,
    param_list=random_params,
    cv=5,
    scoring="accuracy"
)

log_to_json(random_results, "sample_logs/random_results.json")
