In [1]:
import pandas as pd
import numpy as np

# For splitting data
from sklearn.model_selection import train_test_split

# For evaluation
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# XGBoost model
from xgboost import XGBClassifier

In [2]:
# Load the Excel file (replace with your file name if needed)
df = pd.read_excel(r"C:\Users\Tilak Tyagi\Desktop\Kaggle\MLFOOD.xlsx")

# View the first 5 rows
df.head()

Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,...,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
0,36,0,2,329,2,2,3,2,4,1,...,3,2,1,6,3,3,2,2,2,2
1,46,0,1,1144,2,7,4,4,3,1,...,3,3,0,7,2,4,1,0,0,0
2,46,1,2,377,3,9,3,3,1,0,...,3,1,1,28,1,4,7,7,4,3
3,45,0,2,252,2,2,3,2,2,1,...,3,4,0,1,3,3,1,0,0,0
4,43,0,3,775,3,15,3,2,4,0,...,3,3,1,7,5,3,2,2,2,2


# **Feature and Target Split**

In [3]:
# X = all columns except the target ('Attrition')
X = df.drop('Attrition', axis=1)

# y = target column we want to predict
y = df['Attrition']


# **Split into Train and Validation Sets**

In [4]:
# Split data: 80% train, 20% validation
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# **Create and Train the XGBoost Mode**

In [6]:
# Create an XGBoost classifier
# n_estimators: number of trees
# learning_rate: how fast the model learns
# use_label_encoder=False: required to suppress warnings
# eval_metric: performance metric used during training
model = XGBClassifier(n_estimators=100, learning_rate=0.1, eval_metric='logloss')

# Train the model
model.fit(X_train, y_train)


# **Making Prediction And Evaluation**

In [7]:
# Predict on the validation set
y_pred = model.predict(X_valid)

# Accuracy score
print("Accuracy:", accuracy_score(y_valid, y_pred))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_valid, y_pred))

# Precision, Recall, F1-Score
print("Classification Report:\n", classification_report(y_valid, y_pred))

Accuracy: 0.847457627118644
Confusion Matrix:
 [[188   8]
 [ 28  12]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.96      0.91       196
           1       0.60      0.30      0.40        40

    accuracy                           0.85       236
   macro avg       0.74      0.63      0.66       236
weighted avg       0.82      0.85      0.83       236



# **Use Model**

In [10]:
# Load test file (Excel or CSV)
test_df = pd.read_excel(r"C:\Users\Tilak Tyagi\Desktop\Kaggle\MLFOODASTEST.xlsx")  # change filename if needed


In [9]:
# Predict using trained XGBoost model
test_predictions = model.predict(test_df)

# Create output DataFrame with EmployeeNumber and predictions
output_df = pd.DataFrame({
    'Attrition': test_predictions
})

# Save to Excel file
output_df.to_excel("XGBoost_Attrition_Predictions.xlsx", index=False)


# **Giving Final File To Do Prediction**

In [17]:
test_final = pd.read_excel(r"C:\Users\Tilak Tyagi\Desktop\Kaggle\TestML.xlsx")  # change filename if needed
test_final = test_final.reindex(columns=X.columns, fill_value=0)

In [19]:
# Predict using trained XGBoost model
test_predictions = model.predict(test_final)

# Create output DataFrame with EmployeeNumber and predictions
output_df = pd.DataFrame({
    'Attrition': test_predictions
})

# Save to Excel file
output_df.to_excel("XGBoost_Attrition_Predictions_Result.xlsx", index=True)


### **Prediction is complete, now we have a excel file named "XGBoost_Attrition_Predictions_Result"  in the folder we are working on** <br><br> 
# **ThankYou**