In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('HR_Attrition.csv')
df.shape, df.info(), df.head()


In [None]:
df.isnull().sum()
df.duplicated().sum()


In [None]:
from ydata_profiling import ProfileReport
profile = ProfileReport(df, title="HR Attrition EDA Report")
profile.to_file("HR_Attrition_Profile.html")


In [None]:
df[['Age', 'MonthlyIncome', 'TotalWorkingYears']].describe()
sns.histplot(df['MonthlyIncome'], kde=True)
sns.boxplot(x='Attrition', y='MonthlyIncome', data=df)


In [None]:
sns.countplot(x='Department', hue='Attrition', data=df)
sns.countplot(x='JobSatisfaction', hue='Attrition', data=df)


In [None]:
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')


In [None]:
attrition_rate = df['Attrition'].value_counts(normalize=True)
sns.barplot(x='OverTime', y='Attrition', data=df, estimator=lambda x: sum(x=='Yes')/len(x))


In [None]:
sns.catplot(x='JobSatisfaction', y='MonthlyIncome', hue='Attrition', data=df, kind='box')
sns.lmplot(x='YearsAtCompany', y='MonthlyIncome', hue='Attrition', data=df)


# HR Analytics EDA — Understanding Employee Attrition

## 1. Business Context
DataX Solutions is facing high employee turnover. This analysis explores HR data to identify the factors driving attrition.

## 2. Objectives
- Explore and clean HR dataset
- Identify correlations with attrition
- Recommend retention strategies

## 3. Dataset
1470 employees, 35 features (IBM HR Analytics dataset)

## 4. Methodology
1. Data cleaning & preprocessing
2. Univariate & bivariate EDA
3. Attrition pattern analysis
4. Insight summary and recommendations

## 5. Key Findings
- 16% attrition rate overall
- Overtime workers 2.5× more likely to leave
- Low job satisfaction = 60% attrition
- New hires (<2 yrs) churn rapidly
- Salary positively correlated with retention

## 6. Recommendations
- Introduce work-life balance initiatives
- Incentivize long-term tenure
- Increase feedback and training programs

## 7. Deliverables
- HR_EDA.ipynb
- HR_Profile.html (auto profiling)
- EDA_Report.pdf

## 8. Next Steps
- Build predictive model (Logistic Regression)
- Integrate with Power BI HR Dashboard
