In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: Set a visual theme
sns.set_style("whitegrid")


In [None]:
# Replace with your actual file path
salary_data = pd.read_csv("employee_salary_data.csv")

# Preview the data
print(salary_data.head())


In [None]:
# Check for missing values
print(salary_data.isnull().sum())

# Drop rows with missing values
salary_data.dropna(inplace=True)


In [None]:
sns.histplot(salary_data['Salary'], kde=False, bins=30)
plt.title('Salary Distribution')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.show()


In [None]:
sns.histplot(salary_data['Salary'], kde=True, bins=30)
plt.title('Salary Distribution with KDE')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.show()


In [None]:
sns.histplot(salary_data['Salary'], kde=True, bins=50)
plt.title('Salary Distribution (Custom Bin Size)')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.show()


In [None]:
sns.histplot(data=salary_data, x='Salary', hue='Department', kde=True, bins=30)
plt.title('Salary Distribution by Department')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.show()


In [None]:
sns.displot(salary_data['Salary'], kde=True, bins=30)
plt.title('Salary Distribution (Dist Plot)')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.show()


In [None]:
# Example comparison between two departments
sns.kdeplot(data=salary_data[salary_data['Department'] == 'HR'], x='Salary', label='HR', color='blue')
sns.kdeplot(data=salary_data[salary_data['Department'] == 'IT'], x='Salary', label='IT', color='green')

plt.title('KDE Comparison: HR vs IT')
plt.xlabel('Salary')
plt.ylabel('Density')
plt.legend()
plt.show()


In [None]:
sns.histplot(salary_data['Salary'], kde=True, bins=30)
plt.title('Distribution of Employee Salaries')
plt.xlabel('Salary in USD')
plt.ylabel('Number of Employees')
plt.show()


In [None]:
sns.histplot(data=salary_data, x='Salary', hue='Department', kde=True, bins=30, palette='Set2')
plt.title('Salary Distribution by Department (Custom Colors)')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.show()


In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(data=salary_data, x='Salary', hue='Department', kde=True, bins=30, palette='Set2')
plt.title('Final Salary Distribution by Department')
plt.xlabel('Salary in USD')
plt.ylabel('Frequency')
plt.tight_layout()
plt.savefig('salary_distribution_by_department.png', dpi=300)
plt.close()


In [None]:
sns.histplot(data=salary_data, x='Salary', hue='Department', kde=True, bins=30, palette='Set2')
plt.title('Salary Distribution by Department (Final)')
plt.xlabel('Salary in USD')
plt.ylabel('Frequency')
plt.show()
