In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)

ids = np.arange(1, 11)
ages = np.random.randint(18, 60, 10)
salaries = np.random.randint(30000, 90000, 10)
departments = np.array(["HR", "IT", "Finance", "IT", "HR",
                        "Sales", "Finance", "IT", "Sales", "HR"])

DF = pd.DataFrame({
    "id": ids,
    "age": ages,
    "salary": salaries,
    "dept": departments
})

DF.to_csv("employees.csv", index=False)
print("Sample Data Created and Saved as employees.csv")
print("\n--- Data preview ---")
print(DF)
print()

Sample Data Created and Saved as employees.csv

--- Data preview ---
   id  age  salary     dept
0   1   56   77191       HR
1   2   46   74131       IT
2   3   32   46023  Finance
3   4   25   71090       IT
4   5   38   31685       HR
5   6   56   30769    Sales
6   7   36   89735  Finance
7   8   40   86101       IT
8   9   28   32433    Sales
9  10   28   35311       HR



In [2]:
# Q1 ------------------------------------------------------------
age_arr = np.array(ages)
sal_arr = np.array(salaries)

print("Q1 – NumPy arrays")
print("ages   :", age_arr)
print("salaries:", sal_arr)
print("dtype  :", age_arr.dtype, sal_arr.dtype)
print("ndim   :", age_arr.ndim, sal_arr.ndim)
print("shape  :", age_arr.shape, sal_arr.shape)
print("size   :", age_arr.size, sal_arr.size)
print()

Q1 – NumPy arrays
ages   : [56 46 32 25 38 56 36 40 28 28]
salaries: [77191 74131 46023 71090 31685 30769 89735 86101 32433 35311]
dtype  : int32 int32
ndim   : 1 1
shape  : (10,) (10,)
size   : 10 10



In [3]:
# Q2 ------------------------------------------------------------
max_sal = np.max(sal_arr)
min_sal = np.min(sal_arr)
avg_sal = np.mean(sal_arr)
avg_age = np.mean(age_arr)

print("Q2 – Salary & Age statistics")
print(f"Highest salary : {max_sal}")
print(f"Lowest  salary : {min_sal}")
print(f"Average salary : {avg_sal:.2f}")
print(f"Average age    : {avg_age:.2f}")
print()

Q2 – Salary & Age statistics
Highest salary : 89735
Lowest  salary : 30769
Average salary : 57446.90
Average age    : 38.50



In [6]:
# Q3 ------------------------------------------------------------
older_than_30 = age_arr > 30
print("Q3 – Ages > 30")
print("Mask :", older_than_30)
print("Ages >30 :", age_arr[older_than_30])
print(f"Count  : {np.sum(older_than_30)}")
print()

Q3 – Ages > 30
Mask : [ True  True  True False  True  True  True  True False False]
Ages >30 : [56 46 32 38 56 36 40]
Count  : 7



In [None]:
# Q4 ------------------------------------------------------------
updated_ages = age_arr + 5          # does NOT modify original
print("Q4 – Ages + 5 years")
print("Original :", age_arr)
print("Updated  :", updated_ages)
print()

In [None]:
# Q5 ------------------------------------------------------------
total_expense = np.sum(sal_arr)
salary_range  = max_sal - min_sal

print("Q5 – Salary totals")
print(f"Total salary expense : {total_expense}")
print(f"Max – Min difference : {salary_range}")
print()

In [None]:
# --------------------------------------------------------------
# 2) Pandas – 5 Questions
# --------------------------------------------------------------

# Q6 ------------------------------------------------------------
df = pd.read_csv("employees.csv")

print("Q6 – Load & inspect")
print(df.head())
print("\n--- info() ---")
df.info()
print("\n--- describe() ---")
print(df.describe())
print()

In [None]:
# Q7 ------------------------------------------------------------
print("Q7 – Selected columns & last rows")
print(df[["id", "age", "salary"]])
print("\nLast 3 rows:")
print(df.tail(3))
print()

In [None]:
# Q8 ------------------------------------------------------------
it_emp = df[df["dept"] == "IT"]
print("Q8 – IT department only")
print(it_emp)
print(f"Number of IT employees: {len(it_emp)}")
print()

In [None]:
# Q9 ------------------------------------------------------------
top3 = df.sort_values("salary", ascending=False).head(3)
print("Q9 – Top 3 highest-paid")
print(top3[["id", "age", "salary", "dept"]])
print()

In [7]:
# Q10 -----------------------------------------------------------

df = pd.read_csv("employees.csv")
df.loc[df["salary"] > 80000, "salary"] = 80000
new_avg = df["salary"].mean()

print("Q10 – After capping salaries > 80000")
print(df)
print(f"New average salary: {new_avg:.2f}")

Q10 – After capping salaries > 80000
   id  age  salary     dept
0   1   56   77191       HR
1   2   46   74131       IT
2   3   32   46023  Finance
3   4   25   71090       IT
4   5   38   31685       HR
5   6   56   30769    Sales
6   7   36   80000  Finance
7   8   40   80000       IT
8   9   28   32433    Sales
9  10   28   35311       HR
New average salary: 55863.30
