# HR Diagnostic Analytics: Pattern Discovery
This notebook explores patterns in HR data to diagnose causes of employee attrition.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('hr_diagnostic_sample.csv')
df.head()

## 1. Attrition by Tenure

In [None]:
# Categorize tenure
df['TenureCategory'] = pd.cut(df['TenureYears'], bins=[0, 1, 2, 5, 10], labels=['<1 yr', '1-2 yrs', '2-5 yrs', '5-10 yrs'])

# Attrition rate by tenure
attrition_tenure = df.groupby('TenureCategory')['VoluntaryAttrition'].value_counts(normalize=True).unstack().fillna(0)['Yes']
attrition_tenure.plot(kind='bar', title='Attrition Rate by Tenure Category', ylabel='Attrition Rate')
plt.xticks(rotation=0)
plt.show()

## 2. Attrition by Department

In [None]:
attrition_dept = df.groupby('Department')['VoluntaryAttrition'].value_counts(normalize=True).unstack().fillna(0)['Yes']
attrition_dept.plot(kind='bar', title='Attrition Rate by Department', ylabel='Attrition Rate')
plt.xticks(rotation=45)
plt.show()

## 3. Engagement Score by Attrition

In [None]:
sns.boxplot(x='VoluntaryAttrition', y='EngagementScore', data=df)
plt.title('Engagement Score by Attrition')
plt.show()

## 4. Promotion History vs Attrition

In [None]:
sns.histplot(data=df, x='LastPromotionYearsAgo', hue='VoluntaryAttrition', multiple='stack', bins=10)
plt.title('Promotion History vs Attrition')
plt.xlabel('Years Since Last Promotion')
plt.show()

## 5. Manager Rating by Attrition

In [None]:
sns.boxplot(x='VoluntaryAttrition', y='ManagerRating', data=df)
plt.title('Manager Rating by Attrition')
plt.show()

## 6. Summary Table

In [None]:
summary = df.groupby('VoluntaryAttrition')[['TenureYears', 'EngagementScore', 'LastPromotionYearsAgo', 'ManagerRating']].mean()
summary