# End-to-End HR Attrition ML Project

## Load Libraries

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


## Load Dataset

In [None]:

df = pd.read_csv("HumanResources.csv")
df.head()


## Create Target Column (Attrition)

In [None]:

df['Attrition'] = df['termination_date'].notna().astype(int)


## Drop Unnecessary Columns

In [None]:

df.drop(columns=['employee_id','first_name','last_name','termination_date'], inplace=True)


## Date Feature Engineering

In [None]:

df['hire_date'] = pd.to_datetime(df['hire_date'])
df['birth_date'] = pd.to_datetime(df['birth_date'])

df['age'] = (pd.Timestamp.today() - df['birth_date']).dt.days // 365
df['experience_years'] = (pd.Timestamp.today() - df['hire_date']).dt.days // 365

df.drop(columns=['hire_date','birth_date'], inplace=True)


## Encode Binary & Ordinal Features

In [None]:

df['gender'] = df['gender'].map({'Male':1,'Female':0})
df['overtime'] = df['overtime'].map({'Yes':1,'No':0})

education_map = {'High School':1,'Bachelor':2,'Master':3,'PhD':4}
df['education_level'] = df['education_level'].map(education_map)

performance_map = {
    'Needs Improvement':1,
    'Satisfactory':2,
    'Good':3,
    'Excellent':4
}
df['performance_rating'] = df['performance_rating'].map(performance_map)


## One-Hot Encode Nominal Features

In [None]:

df = pd.get_dummies(
    df,
    columns=['state','city','department','job_title'],
    drop_first=True
)


## Split Data

In [None]:

X = df.drop('Attrition', axis=1)
y = df['Attrition']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


## Feature Scaling

In [None]:

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


## Train Model

In [None]:

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


## Evaluation

In [None]:

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


## Exploratory Data Analysis (EDA) â€“ Visualizations

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns


### 1. Attrition Distribution

In [None]:

sns.countplot(x='Attrition', data=df)
plt.title("Attrition Distribution")
plt.show()


### 2. Gender vs Attrition

In [None]:

sns.countplot(x='gender', hue='Attrition', data=df)
plt.title("Gender vs Attrition")
plt.show()


### 3. Department-wise Attrition

In [None]:

dept_attr = df.groupby('department_Sales')['Attrition'].mean()
dept_attr.plot(kind='bar')
plt.title("Department-wise Attrition Rate (Sample)")
plt.ylabel("Attrition Rate")
plt.show()


### 4. Salary Distribution

In [None]:

sns.histplot(df['salary'], kde=True)
plt.title("Salary Distribution")
plt.show()


### 5. Salary vs Attrition

In [None]:

sns.boxplot(x='Attrition', y='salary', data=df)
plt.title("Salary vs Attrition")
plt.show()


### 6. Age Distribution

In [None]:

sns.histplot(df['age'], bins=20, kde=True)
plt.title("Age Distribution")
plt.show()


### 7. Experience vs Attrition

In [None]:

sns.boxplot(x='Attrition', y='experience_years', data=df)
plt.title("Experience vs Attrition")
plt.show()


### 8. Correlation Heatmap

In [None]:

plt.figure(figsize=(12,8))
sns.heatmap(df.corr(), cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()
