In [6]:
!pip install flaml

Collecting flaml
  Downloading FLAML-2.3.6-py3-none-any.whl.metadata (16 kB)
Downloading FLAML-2.3.6-py3-none-any.whl (322 kB)
Installing collected packages: flaml
Successfully installed flaml-2.3.6


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your IoT dataset
c3f = pd.read_csv(r"C:\Users\Kaveri\Downloads\CHASE datasets\c3f.csv")

# Encode label
c3f['Label_num'] = c3f['Label'].astype('category').cat.codes

# Split 70-15-15
train_df, temp_df = train_test_split(
    c3f,
    test_size=0.30,
    stratify=c3f['Label_num'],
    random_state=42
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.50,
    stratify=temp_df['Label_num'],
    random_state=42
)

print(f"Train shape: {train_df.shape}, Val shape: {val_df.shape}, Test shape: {test_df.shape}")


Train shape: (204924, 39), Val shape: (43912, 39), Test shape: (43913, 39)


In [12]:
import numpy as np
import pandas as pd

def clean_for_flaml(X):
    """Replace NaN/inf values with safe numbers for FLAML"""
    X = X.copy()
    X = X.replace([np.inf, -np.inf], np.nan)   # replace inf with NaN
    X = X.fillna(X.median())                   # fill NaN with median
    return X


In [13]:
X_trainval = clean_for_flaml(X_trainval)
X_test = clean_for_flaml(X_test)

In [15]:
from flaml import AutoML
from sklearn.metrics import accuracy_score

targets = ["Cat", "Sub_Cat"]
results = {}

for target in targets:
    print(f"\n================ FLAML AutoML for {target} ================")
    
    # Features/labels for Train and Validation
    X_train = train_df.drop(columns=["Cat", "Sub_Cat", "Label", "Label_num"])
    y_train = train_df[target]
    
    X_val = val_df.drop(columns=["Cat", "Sub_Cat", "Label", "Label_num"])
    y_val = val_df[target]
    
    # âœ… Clean features
    X_train = clean_for_flaml(X_train)
    X_val = clean_for_flaml(X_val)

    # Run FLAML
    automl = AutoML()
    automl.fit(
        X_train, y_train,
        task="classification",
        time_budget=300,    # 5 minutes
        metric="accuracy",
        seed=42
    )
    
    # Evaluate on VALIDATION (not test)
    y_pred = automl.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    
    results[target] = {
        "best_model": automl.best_estimator,
        "val_accuracy": acc
    }

print("\nðŸ“Š FLAML Results (Validation Accuracy):")
for target, res in results.items():
    print(f"{target}: {res['best_model']} â†’ Val Acc = {res['val_accuracy']:.3f}")



[flaml.automl.logger: 09-02 07:23:32] {1752} INFO - task = classification
[flaml.automl.logger: 09-02 07:23:32] {1763} INFO - Evaluation method: holdout
[flaml.automl.logger: 09-02 07:23:33] {1862} INFO - Minimizing error metric: 1-accuracy
[flaml.automl.logger: 09-02 07:23:33] {1979} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'sgd', 'lrl1']
[flaml.automl.logger: 09-02 07:23:33] {2282} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 09-02 07:23:33] {2417} INFO - Estimated sufficient time budget=19197s. Estimated necessary time budget=444s.
[flaml.automl.logger: 09-02 07:23:33] {2466} INFO -  at 2.9s,	estimator lgbm's best error=0.1515,	best estimator lgbm's best error=0.1515
[flaml.automl.logger: 09-02 07:23:33] {2282} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 09-02 07:23:33] {2466} INFO -  at 3.0s,	estimator lgbm's best error=0.1515,	best estimator lgbm's best error=0.1515
[flaml.automl.logge

## Result comparison

### ðŸ“Š Model Selection Results (Validation Accuracy)

| Level    | Handcoded Rules (Model + Val Acc)              | RL Agent (Model + Val Acc)                         | FLAML AutoML (Model + Val Acc) |
|----------|-----------------------------------------------|---------------------------------------------------|--------------------------------|
| Cat      | XGB+LGB Soft Voting â†’ **0.982**               | Mixed (LGB+RF, XGB+LGB+RF, etc.) â†’ **0.975**          | xgb_limitdepth â†’ **0.979**         |
| Sub_Cat  | Stacking XGB+LGB+RF (Meta=Logistic) â†’ **0.903** | Mostly Stacking (XGB+LGB+RF+Meta) â†’ **0.858**          | xgboost â†’ **0.847**                |
| Label    | Deterministic                                 | Deterministic                                     | â€“                              |
