# 👥 HR Attrition Analysis (SQL + Python)

This notebook analyzes the IBM HR Analytics dataset using SQL queries and Python EDA. 
It explores attrition trends by department, role, age, salary, and tenure.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3

# Connect to HR dataset (assuming hr_data table exists in hr.sqlite)
conn = sqlite3.connect('hr.sqlite')

## 1. Attrition Rate by Department

In [None]:
query = '''
SELECT Department,
       COUNT(*) FILTER (WHERE Attrition = 'Yes') * 1.0 / COUNT(*) * 100 AS attrition_rate_pct
FROM hr_data
GROUP BY Department
ORDER BY attrition_rate_pct DESC;
'''
df_dept = pd.read_sql(query, conn)
df_dept

In [None]:
plt.figure(figsize=(8,5))
sns.barplot(data=df_dept, x='attrition_rate_pct', y='Department', palette='Reds_r')
plt.title('Attrition Rate by Department')
plt.xlabel('Attrition Rate (%)')
plt.ylabel('Department')
plt.show()

## 2. Attrition by Age Band

In [None]:
query = '''
SELECT CASE
         WHEN Age < 30 THEN '<30'
         WHEN Age BETWEEN 30 AND 40 THEN '30-40'
         WHEN Age BETWEEN 40 AND 50 THEN '40-50'
         ELSE '50+'
       END AS age_band,
       COUNT(*) FILTER (WHERE Attrition = 'Yes') * 1.0 / COUNT(*) * 100 AS attrition_rate_pct
FROM hr_data
GROUP BY age_band
ORDER BY age_band;
'''
df_age = pd.read_sql(query, conn)
df_age

In [None]:
plt.figure(figsize=(6,4))
sns.barplot(data=df_age, x='age_band', y='attrition_rate_pct', palette='coolwarm')
plt.title('Attrition Rate by Age Band')
plt.xlabel('Age Band')
plt.ylabel('Attrition Rate (%)')
plt.show()

## 3. Attrition vs Salary Bands

In [None]:
query = '''
SELECT CASE
         WHEN MonthlyIncome < 3000 THEN '<3000'
         WHEN MonthlyIncome BETWEEN 3000 AND 5000 THEN '3000-5000'
         WHEN MonthlyIncome BETWEEN 5001 AND 8000 THEN '5001-8000'
         ELSE '8000+'
       END AS salary_band,
       COUNT(*) FILTER (WHERE Attrition = 'Yes') * 1.0 / COUNT(*) * 100 AS attrition_rate_pct
FROM hr_data
GROUP BY salary_band
ORDER BY salary_band;
'''
df_salary = pd.read_sql(query, conn)
df_salary

In [None]:
plt.figure(figsize=(6,4))
sns.barplot(data=df_salary, x='salary_band', y='attrition_rate_pct', palette='Blues_d')
plt.title('Attrition Rate by Salary Band')
plt.xlabel('Salary Band')
plt.ylabel('Attrition Rate (%)')
plt.show()

## 4. Average Tenure of Attrited vs Active Employees

In [None]:
query = '''
SELECT Attrition,
       ROUND(AVG(YearsAtCompany), 2) AS avg_tenure
FROM hr_data
GROUP BY Attrition;
'''
df_tenure = pd.read_sql(query, conn)
df_tenure

In [None]:
plt.figure(figsize=(5,4))
sns.barplot(data=df_tenure, x='Attrition', y='avg_tenure', palette='Set2')
plt.title('Average Tenure: Attrited vs Active Employees')
plt.xlabel('Attrition')
plt.ylabel('Average Tenure (Years)')
plt.show()