# Employee Attrition Prediction
### Models: Logistic Regression, Decision Tree, Random Forest, Neural Network (MLP)

This notebook applies multiple machine learning models to predict employee attrition.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

## Data Loading and Exploration

In [None]:
df0 = pd.read_csv('HR_capstone_dataset.csv')
df0.head()

In [None]:
df0.info()
df0.describe()

## Column Name Standardization

In [None]:
df0 = df0.rename(columns={
    'average_montly_hours': 'average_monthly_hours',
    'time_spend_company': 'time_spent_company',
    'promotion_last_5years': 'promotion_last_5_years'
})
df0.columns

## Missing Values and Duplicates

In [None]:
df0.isnull().sum()

In [None]:
df = df0.drop_duplicates()
df.shape

In [None]:
plt.figure()
plt.boxplot(df['time_spent_company'])
plt.xlabel('Tenure')
plt.ylabel('Years Spent')
plt.title('Time Spent in the Company')
plt.show()

In [None]:
Q1 = df['time_spent_company'].quantile(0.25)
Q3 = df['time_spent_company'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = df[(df['time_spent_company'] < lower_bound) | (df['time_spent_company'] > upper_bound)]
len(outliers)

## Encoding Categorical Variables

In [None]:
le = LabelEncoder()
df['Department'] = le.fit_transform(df['Department'])
df['salary'] = le.fit_transform(df['salary'])
df.head()

## Train-Test Split and Neural Network Model

In [None]:
X = df.drop('left', axis=1)
y = df['left']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
#Logistic Regression

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train_scaled, y_train)

y_pred_lr = log_reg.predict(X_test_scaled)

print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))

In [None]:
#Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

In [None]:
#Random Forest
rf = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)
rf.fit(X_train, y_train)

y_pred_rf = rf.predict(X_test)

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

In [None]:
# Neural Networks

mlp = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    activation='relu',
    max_iter=100,
    random_state=42
)

mlp.fit(X_train_scaled, y_train)

y_pred_mlp = mlp.predict(X_test_scaled)

print("Neural Network Accuracy:", accuracy_score(y_test, y_pred_mlp))
print(classification_report(y_test, y_pred_mlp))