# Employee Attrition EDA
This notebook performs exploratory data analysis (EDA) on the Employee Attrition dataset.
It includes distribution plots, correlation heatmaps, and pairplots.

**Note:** This notebook includes installation commands to ensure required libraries are available.

In [ ]:
# Install required libraries (if not installed)
!pip install pandas matplotlib seaborn --quiet

In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Ensure screenshots directory exists
os.makedirs('../screenshots', exist_ok=True)

# Load data
df = pd.read_csv('../data/employee_attrition.csv')
df.head()

## Attrition Distribution

In [ ]:
plt.figure(figsize=(6,4))
sns.countplot(x='Attrition', data=df)
plt.title('Attrition Count')
plt.savefig('../screenshots/attrition_count.png')
plt.show()

## Correlation Heatmap

In [ ]:
plt.figure(figsize=(12,10))
numeric_cols = ['Age','DistanceFromHome','MonthlyIncome','NumCompaniesWorked',
                'TotalWorkingYears','TrainingTimesLastYear','YearsAtCompany',
                'YearsInCurrentRole','YearsSinceLastPromotion','YearsWithCurrManager']
cor = df[numeric_cols + ['Attrition']].corr()
sns.heatmap(cor, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.savefig('../screenshots/correlation_heatmap.png')
plt.show()

## Pairplot

In [ ]:
g = sns.pairplot(df, hue='Attrition', vars=['Age','MonthlyIncome','TotalWorkingYears','YearsAtCompany'])
g.fig.savefig('../screenshots/pairplot.png')
plt.close()