In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {
    "EmployeeID": [101, 102, 103, 104, 105, 106, 107, 108],
    "Department": ["IT", "HR", "IT", "Finance", "HR", "Finance", "IT", "HR"],
    "Salary": [6000, 4500, 7000, np.nan, 5000, 6500, np.nan, 4800],
    "Experience": [2, 1, 4, 3, np.nan, 5, 2, 1],
    "Rating": [4.5, 3.8, 4.2, 4.0, 3.5, np.nan, 4.8, 3.9]
}

df = pd.DataFrame(data)
df

Unnamed: 0,EmployeeID,Department,Salary,Experience,Rating
0,101,IT,6000.0,2.0,4.5
1,102,HR,4500.0,1.0,3.8
2,103,IT,7000.0,4.0,4.2
3,104,Finance,,3.0,4.0
4,105,HR,5000.0,,3.5
5,106,Finance,6500.0,5.0,
6,107,IT,,2.0,4.8
7,108,HR,4800.0,1.0,3.9


In [3]:
df.shape
df.columns
df.dtypes
df.isna().sum()

EmployeeID    0
Department    0
Salary        2
Experience    1
Rating        1
dtype: int64

In [4]:
df["Salary"] = df["Salary"].fillna(df["Salary"].mean())
df["Experience"] = df["Experience"].fillna(df["Experience"].median())
df["Rating"] = df["Rating"].fillna(df["Rating"].mean())

df.isna().sum()

EmployeeID    0
Department    0
Salary        0
Experience    0
Rating        0
dtype: int64

In [5]:
high_salary = df[df["Salary"] > 5500]
it_employees = df[df["Department"] == "IT"]
top_rated = df[df["Rating"] >= 4.0]

high_salary
it_employees
top_rated

Unnamed: 0,EmployeeID,Department,Salary,Experience,Rating
0,101,IT,6000.0,2.0,4.5
2,103,IT,7000.0,4.0,4.2
3,104,Finance,5633.333333,3.0,4.0
5,106,Finance,6500.0,5.0,4.1
6,107,IT,5633.333333,2.0,4.8


In [6]:
summary = df.groupby("Department").agg({
    "Salary": ["mean", "median"],
    "Rating": ["mean", "median"],
    "EmployeeID": "count"
})

summary

Unnamed: 0_level_0,Salary,Salary,Rating,Rating,EmployeeID
Unnamed: 0_level_1,mean,median,mean,median,count
Department,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Finance,6066.666667,6066.666667,4.05,4.05,2
HR,4766.666667,4800.0,3.733333,3.8,3
IT,6211.111111,6000.0,4.5,4.5,3


In [7]:
top_paid = df.sort_values(by="Salary", ascending=False).head(3)
lowest_paid = df.sort_values(by="Salary", ascending=True).head(3)

top_paid
lowest_paid

Unnamed: 0,EmployeeID,Department,Salary,Experience,Rating
1,102,HR,4500.0,1.0,3.8
7,108,HR,4800.0,1.0,3.9
4,105,HR,5000.0,2.0,3.5


In [None]:
## Conclusion

'''This mini EDA demonstrates a complete data analysis workflow:
- Inspecting data  
- Handling missing values  
- Filtering and grouping  
- Deriving simple business insights  

This workflow can be extended to larger real-world datasets and visualized further.'''