In [1]:
import numpy as np
import pandas as pd

# List of dictionaries for a small dataset
data = [
    {"id": 1, "age": 25, "salary": 50000, "department": "HR"},
    {"id": 2, "age": 30, "salary": 60000, "department": "Engineering"},
    {"id": 3, "age": 28, "salary": 55000, "department": "Engineering"},
    {"id": 4, "age": 35, "salary": 70000, "department": "Finance"},
    {"id": 5, "age": 27, "salary": 52000, "department": "HR"}
]
print("Raw Data:", data)

# Convert to Pandas DataFrame
df = pd.DataFrame(data)
print("\nDataFrame:")
print(df)

Raw Data: [{'id': 1, 'age': 25, 'salary': 50000, 'department': 'HR'}, {'id': 2, 'age': 30, 'salary': 60000, 'department': 'Engineering'}, {'id': 3, 'age': 28, 'salary': 55000, 'department': 'Engineering'}, {'id': 4, 'age': 35, 'salary': 70000, 'department': 'Finance'}, {'id': 5, 'age': 27, 'salary': 52000, 'department': 'HR'}]

DataFrame:
   id  age  salary   department
0   1   25   50000           HR
1   2   30   60000  Engineering
2   3   28   55000  Engineering
3   4   35   70000      Finance
4   5   27   52000           HR


In [2]:
departments = set(employee["department"] for employee in data)
print("Unique Departments:", departments)

Unique Departments: {'Finance', 'HR', 'Engineering'}


In [3]:
# Use list comprehension to filter and transform data
# Filter employees in Engineering and increase their salary by 10%
engineering_employees = [emp for emp in data if emp["department"] == "Engineering"]
for emp in engineering_employees:
    emp["salary"] *= 1.10  # 10% raise
print("\nEngineering Employees with 10% Raise:", engineering_employees)

# Update DataFrame
df_updated = pd.DataFrame(data)
print("\nUpdated DataFrame:")
print(df_updated)


Engineering Employees with 10% Raise: [{'id': 2, 'age': 30, 'salary': 66000.0, 'department': 'Engineering'}, {'id': 3, 'age': 28, 'salary': 60500.00000000001, 'department': 'Engineering'}]

Updated DataFrame:
   id  age   salary   department
0   1   25  50000.0           HR
1   2   30  66000.0  Engineering
2   3   28  60500.0  Engineering
3   4   35  70000.0      Finance
4   5   27  52000.0           HR


In [4]:
# Normalize the 'age' column (scale to 0-1 range)
ages = [emp["age"] for emp in data]  # Extract ages using list comprehension
ages_array = np.array(ages)  # Convert to NumPy array
normalized_ages = (ages_array - ages_array.min()) / (ages_array.max() - ages_array.min())  # Min-Max normalization
print("Original Ages:", ages)
print("Normalized Ages:", normalized_ages)

# Add normalized ages back to the DataFrame
df_updated["normalized_age"] = normalized_ages
print("\nDataFrame with Normalized Age:")
print(df_updated)

Original Ages: [25, 30, 28, 35, 27]
Normalized Ages: [0.  0.5 0.3 1.  0.2]

DataFrame with Normalized Age:
   id  age   salary   department  normalized_age
0   1   25  50000.0           HR             0.0
1   2   30  66000.0  Engineering             0.5
2   3   28  60500.0  Engineering             0.3
3   4   35  70000.0      Finance             1.0
4   5   27  52000.0           HR             0.2


In [5]:
# Encode the 'department' column (label encoding)
# Create a dictionary to map departments to integers
dept_mapping = {dept: idx for idx, dept in enumerate(departments)}
print("Department Mapping:", dept_mapping)

# Add encoded department to the DataFrame
df_updated["department_encoded"] = df_updated["department"].map(dept_mapping)
print("\nDataFrame with Encoded Department:")
print(df_updated)

Department Mapping: {'Finance': 0, 'HR': 1, 'Engineering': 2}

DataFrame with Encoded Department:
   id  age   salary   department  normalized_age  department_encoded
0   1   25  50000.0           HR             0.0                   1
1   2   30  66000.0  Engineering             0.5                   2
2   3   28  60500.0  Engineering             0.3                   2
3   4   35  70000.0      Finance             1.0                   0
4   5   27  52000.0           HR             0.2                   1
