
# Supervised Learning - XGBoost Example

This Jupyter notebook p4n provides a step-by-step example of using XGBoost for supervised learning on the `p4n_employee` dataset.


In [1]:

import pandas as pd

# Load the dataset
data = pd.read_csv('p4n_employee_extended.csv')
data.head()


ModuleNotFoundError: No module named 'pandas'

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode categorical variables
label_encoder = LabelEncoder()
data['Department'] = label_encoder.fit_transform(data['Department'])
data['Attrition'] = label_encoder.fit_transform(data['Attrition'])

# Split the data into features and target
X = data.drop('Attrition', axis=1)
y = data['Attrition']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

import xgboost as xgb

# Create a DMatrix
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set parameters
params = {
    'objective': 'binary:logistic',
    'max_depth': 3,
    'learning_rate': 0.1,
    'n_estimators': 100,
    'eval_metric': 'logloss'
}

# Train the model
bst = xgb.train(params, dtrain, num_boost_round=100)


In [None]:

from sklearn.metrics import accuracy_score, confusion_matrix

# Make predictions
y_pred = bst.predict(dtest)
y_pred = [1 if y > 0.5 else 0 for y in y_pred]

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')
