<a href="https://colab.research.google.com/github/maralhi/Sensors_models/blob/main/04_Hyperparameter_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

d) Hyperparameter optimization (not needed):

In [None]:
from google.colab import drive
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

drive.mount('/content/drive')

file_path = '/content/drive/MyDrive/Projects_ML/Project_Sensors/df_cl.csv'
df = pd.read_csv(file_path)

X = df[['VOC', 'AQ', 'footfall_tempMode_interaction']]
y = df['fail']

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

xgb_model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)

param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [100, 200],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    scoring='f1_macro',
    cv=5,
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train_scaled, y_train)

print("Best found XGBoost hyperparameters:")
print(grid_search.best_params_)

print("Best F1 macro score:")
print(grid_search.best_score_)


Mounted at /content/drive
Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best found XGBoost hyperparameters:
{'colsample_bytree': 0.8, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 200, 'subsample': 0.8}
Best F1 macro score:
0.9121652260595127


Parameters: { "use_label_encoder" } are not used.



In [None]:
from google.colab import drive
import pandas as pd

drive.mount('/content/drive')

file_path = '/content/drive/MyDrive/Projects_ML/Project_Sensors/df_cl.csv'

df = pd.read_csv(file_path)

df.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,footfall,tempMode,AQ,USS,CS,VOC,RP,IP,Temperature,fail,is_outlier_footfall,is_outlier_CS,is_outlier_Temperature,footfall_tempMode_interaction
0,0,7,7,1,6,6,36,3,1,1,False,False,True,0
1,190,1,3,3,5,1,20,4,1,0,False,False,True,190
2,31,7,2,2,6,1,24,6,1,0,False,False,True,217
3,83,4,3,4,5,1,28,6,1,0,False,False,True,332
4,640,7,5,6,4,0,68,6,1,0,True,False,True,4480


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression

param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

lr = LogisticRegression(max_iter=1000)
grid_search = GridSearchCV(lr, param_grid, cv=5, scoring='f1_macro')
grid_search.fit(X_train, y_train)

print("Best params:", grid_search.best_params_)


Best params: {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}


In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from scipy.stats import uniform

param_distributions = {
    'C': uniform(0.1, 10),
    'gamma': uniform(0.001, 1),
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'degree': [2, 3, 4]
}

svm = SVC()
random_search = RandomizedSearchCV(svm, param_distributions, n_iter=50, cv=5, scoring='f1_macro', random_state=42)
random_search.fit(X_train, y_train)

print("Best params:", random_search.best_params_)

e) Retraining with optimized hyperparameters


In [None]:
from xgboost import XGBClassifier

best_params_xgb = grid_search.best_params_

final_xgb_model = XGBClassifier(
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42,
    **best_params_xgb
)

final_xgb_model.fit(X_train, y_train)



In [None]:
from sklearn.linear_model import LogisticRegression

best_params_lr = grid_search.best_params_

final_lr_model = LogisticRegression(max_iter=1000, **best_params_lr)
final_lr_model.fit(X_train, y_train)


In [None]:
from sklearn.svm import SVC

best_params_svm = random_search.best_params_

final_svm_model = SVC(**best_params_svm)

final_svm_model.fit(X_train, y_train)
