In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

ModuleNotFoundError: No module named 'xgboost'

In [2]:
pip install xgboost


Collecting xgboost
  Downloading xgboost-2.1.4-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.4-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 1.0/124.9 MB 5.6 MB/s eta 0:00:23
    --------------------------------------- 1.8/124.9 MB 4.4 MB/s eta 0:00:29
    --------------------------------------- 2.4/124.9 MB 4.2 MB/s eta 0:00:30
   - -------------------------------------- 3.1/124.9 MB 4.0 MB/s eta 0:00:31
   - -------------------------------------- 3.9/124.9 MB 3.9 MB/s eta 0:00:32
   - -------------------------------------- 4.7/124.9 MB 3.8 MB/s eta 0:00:33
   - -------------------------------------- 5.5/124.9 MB 3.8 MB/s eta 0:00:32
   - -------------------------------------- 6.0/124.9 MB 3.7 MB/s eta 0:00:32
   -- ------------------------------------- 6.8/124.9 MB 3.7 MB/s eta 0:00:33
   -- ------------------------------------- 7.6/124.9 MB 3.6 MB/s eta 0:00:33
 

In [None]:
# -------------------------
# 1. Data Loading & Preprocessing
# -------------------------
# Load your dataset (ensure you have preprocessed features for drowsiness detection)
data_path = 'data.csv'  # update with your actual file path if different
df = pd.read_csv(data_path)

# Assuming the dataset has a "label" column indicating drowsiness (1 for drowsy, 0 for alert)
X = df.drop('label', axis=1)
y = df['label']

# Standardize the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# -------------------------
# 2. Define Base Models
# -------------------------
# Create individual models with fixed random states for reproducibility
rf = RandomForestClassifier(n_estimators=100, random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
svm = SVC(probability=True, kernel='rbf', random_state=42)

# -------------------------
# 3. Create an Ensemble Using VotingClassifier
# -------------------------
# 'soft' voting uses the predicted class probabilities to make a final decision.
ensemble_clf = VotingClassifier(
    estimators=[('rf', rf), ('xgb', xgb), ('svm', svm)],
    voting='soft'
)

# -------------------------
# 4. Hyperparameter Tuning with Grid Search
# -------------------------
# Define a parameter grid to tune selected hyperparameters for each classifier
param_grid = {
    'rf__n_estimators': [100, 200],
    'xgb__n_estimators': [50, 100],
    'svm__C': [0.1, 1, 10]
}

grid_search = GridSearchCV(
    estimator=ensemble_clf,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

# Fit the grid search on the training data
grid_search.fit(X_train, y_train)
print("Best parameters found:", grid_search.best_params_)

# -------------------------
# 5. Evaluation on the Test Set
# -------------------------
# Predict on the test set using the best model from grid search
y_pred = grid_search.predict(X_test)
print("\nTest Accuracy: {:.2f}%".format(accuracy_score(y_test, y_pred) * 100))
print("\nClassification Report:\n", classification_report(y_test, y_pred))