In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from imblearn.over_sampling import SMOTE  # For class balancing
import xgboost as xgb  # XGBoost library

# Load the dataset
file_path = '/mnt/data/engine_data.csv'
data = pd.read_csv(file_path)

# Data Preprocessing
# Separate features (X) and target (y)
X = data.drop('Engine Condition', axis=1)
y = data['Engine Condition']

# Handling class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split the resampled data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the XGBoost model
xgb_model = xgb.XGBClassifier(random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [100, 200],  # Limited to 100 and 200 to speed up
    'learning_rate': [0.01, 0.1],  # Reduced range
    'max_depth': [3, 5],  # Reduced depth
    'min_child_weight': [1, 3],  # Focused search
    'subsample': [0.8],
    'colsample_bytree': [0.8]
}

# Perform Grid Search
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters and train the best model
best_xgb = grid_search.best_estimator_

# Model evaluation on the test set
y_pred = best_xgb.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Output results
accuracy, conf_matrix, class_report

