In [2]:
import numpy as np
import pandas as pd
from functools import reduce

df = pd.DataFrame({
    "employee": ["Keshav", "Afroze", "Nico", "Sravya", "Ethan", "Fiona"],
    "department": ["HR", "Finance", "IT", "Finance", "IT", "HR"],
    "salary": [500000, 600000, np.nan, 720000, 550000, 580000],
    "joining_date": pd.to_datetime([
        "2020-01-15", "2019-03-10", "2021-06-25", 
        "2018-07-01", "2022-02-14", "2021-11-11"
    ]),
    "bonus": [50000, 7000000, 40000, 80000, np.nan, 60000]
}, index=["E1", "E2", "E3", "E4", "E5", "E6"])

print(df)

   employee department    salary joining_date      bonus
E1   Keshav         HR  500000.0   2020-01-15    50000.0
E2   Afroze    Finance  600000.0   2019-03-10  7000000.0
E3     Nico         IT       NaN   2021-06-25    40000.0
E4   Sravya    Finance  720000.0   2018-07-01    80000.0
E5    Ethan         IT  550000.0   2022-02-14        NaN
E6    Fiona         HR  580000.0   2021-11-11    60000.0


In [3]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020-01-15,50000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E3,Nico,IT,,2021-06-25,40000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0
E5,Ethan,IT,550000.0,2022-02-14,
E6,Fiona,HR,580000.0,2021-11-11,60000.0


In [4]:
# Add a 10% raise to all salaries (handling NaN safely)
def my_func(col):
    if pd.notna(col):
        col = col * 1.10
    return col

In [5]:
salaries_with_raise = list(map(my_func, df.bonus))

In [6]:
salaries_with_raise

[55000.00000000001, 7700000.000000001, 44000.0, 88000.0, nan, 66000.0]

In [7]:
#map() by itself returns a lazy iterator, thats why we need to use list here

In [8]:
#eg
newlist = map(my_func, df.bonus)

In [9]:
newlist

<map at 0x104904f10>

In [10]:
print(newlist)

<map object at 0x104904f10>


In [11]:
#therefore we needed a list to iterate and not just see a map object

In [12]:
#also when we use it pandas we get a list normally because we use it with a pandas series, howver when we use it without a series or a df
# we need a list

In [24]:
# reducing salary by 10% but using a lmbda function now

In [26]:
reduction = df.salary.map(lambda x: x//1.1 if pd.notna(x) else x)

In [28]:
reduction

E1    454545.0
E2    545454.0
E3         NaN
E4    654545.0
E5    499999.0
E6    527272.0
Name: salary, dtype: float64

In [30]:
# got a series object instead of a map obj because of pandas

In [32]:
# capitalize all department names

In [34]:
letter = "this is a sample"

In [36]:
letter.upper()

'THIS IS A SAMPLE'

In [39]:
cap = df.department.map(lambda x: x.upper())

In [41]:
cap

E1         HR
E2    FINANCE
E3         IT
E4    FINANCE
E5         IT
E6         HR
Name: department, dtype: object

In [43]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020-01-15,50000.0
E2,Afroze,Finance,600000.0,2019-03-10,7000000.0
E3,Nico,IT,,2021-06-25,40000.0
E4,Sravya,Finance,720000.0,2018-07-01,80000.0
E5,Ethan,IT,550000.0,2022-02-14,
E6,Fiona,HR,580000.0,2021-11-11,60000.0


In [45]:
df.department = df.department.map(lambda x: x.upper())

In [47]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020-01-15,50000.0
E2,Afroze,FINANCE,600000.0,2019-03-10,7000000.0
E3,Nico,IT,,2021-06-25,40000.0
E4,Sravya,FINANCE,720000.0,2018-07-01,80000.0
E5,Ethan,IT,550000.0,2022-02-14,
E6,Fiona,HR,580000.0,2021-11-11,60000.0


In [49]:
# Check if salary is above 600000 (return True/False)

In [51]:
sal_check = df.salary.map(lambda x: x )

In [53]:
sal_check

E1    500000.0
E2    600000.0
E3         NaN
E4    720000.0
E5    550000.0
E6    580000.0
Name: salary, dtype: float64

In [55]:
sal_check = df.salary.map(lambda x: True if x>600000 else False )

In [57]:
sal_check

E1    False
E2    False
E3    False
E4     True
E5    False
E6    False
Name: salary, dtype: bool

In [59]:
# Convert joining_date to just the year

In [64]:
year_only = df.joining_date.map(lambda x: x.year if pd.notna(x) else None)

In [66]:
year_only

E1    2020
E2    2019
E3    2021
E4    2018
E5    2022
E6    2021
Name: joining_date, dtype: int64

In [68]:
df.joining_date = year_only

In [70]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020,50000.0
E2,Afroze,FINANCE,600000.0,2019,7000000.0
E3,Nico,IT,,2021,40000.0
E4,Sravya,FINANCE,720000.0,2018,80000.0
E5,Ethan,IT,550000.0,2022,
E6,Fiona,HR,580000.0,2021,60000.0


In [74]:
# Replace missing bonuses with 0
df.bonus = df.bonus.map(lambda x: 0 if pd.isna(x) else x)

In [76]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus
E1,Keshav,HR,500000.0,2020,50000.0
E2,Afroze,FINANCE,600000.0,2019,7000000.0
E3,Nico,IT,,2021,40000.0
E4,Sravya,FINANCE,720000.0,2018,80000.0
E5,Ethan,IT,550000.0,2022,0.0
E6,Fiona,HR,580000.0,2021,60000.0


In [78]:
# Return length of each employee name

In [84]:
df['length'] = df.employee.map(lambda x: len(x))

In [86]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus,length
E1,Keshav,HR,500000.0,2020,50000.0,6
E2,Afroze,FINANCE,600000.0,2019,7000000.0,6
E3,Nico,IT,,2021,40000.0,4
E4,Sravya,FINANCE,720000.0,2018,80000.0,6
E5,Ethan,IT,550000.0,2022,0.0,5
E6,Fiona,HR,580000.0,2021,60000.0,5


In [88]:
# Create a new Series that tags each row as "High Bonus" if bonus > 50000, else "Normal"

In [90]:
df['high_or_not'] = df.bonus.map(lambda x: "high bonus" if x> 50000 else "normal")

In [92]:
df

Unnamed: 0,employee,department,salary,joining_date,bonus,length,high_or_not
E1,Keshav,HR,500000.0,2020,50000.0,6,normal
E2,Afroze,FINANCE,600000.0,2019,7000000.0,6,high bonus
E3,Nico,IT,,2021,40000.0,4,normal
E4,Sravya,FINANCE,720000.0,2018,80000.0,6,high bonus
E5,Ethan,IT,550000.0,2022,0.0,5,normal
E6,Fiona,HR,580000.0,2021,60000.0,5,high bonus


In [96]:
# lets do filter now

In [98]:
# Filter employees with salary > 600000

In [112]:
# also getting the emp name
high_salary = list(filter(lambda x: x[1] > 600000, zip(df["employee"], df["salary"])))

In [114]:
high_salary

[('Sravya', 720000.0)]

In [118]:
# Employees in the IT department

In [128]:
it_department = filter(lambda x: x[1] == "IT", zip(df["employee"], df["department"]))

In [130]:
it_department

<filter at 0x15676bb50>

In [132]:
it_department = list(filter(lambda x: x[1] == "IT", zip(df["employee"], df["department"])))

In [134]:
it_department

[('Nico', 'IT'), ('Ethan', 'IT')]

In [136]:
# Employees with bonus greater than 50,000

In [156]:
bonus_max=list(filter(lambda x:x[1]>50000 ,zip(df.employee, df.bonus)))

In [158]:
bonus_max

[('Afroze', 7000000.0), ('Sravya', 80000.0), ('Fiona', 60000.0)]

In [160]:
# Rows where bonus is 0

In [162]:
zero_bonus = list(filter(lambda x: x[1]==0,zip(df.employee, df.bonus)))

In [164]:
zero_bonus

[('Ethan', 0.0)]

In [166]:
# Employees whose name starts with "F"

In [180]:
names_f = list(filter(lambda x: x.startswith("F"), df["employee"]))

In [182]:
names_f

['Fiona']

In [184]:
# Employees in HR or Finance

In [196]:
hr_finance = filter(lambda x: x[1] if x[1]=="HR" or x[1]=="FINANCE" else None, zip(df["employee"], df["department"]))

In [198]:
hr_finance

<filter at 0x1682d6260>

In [200]:
hr_finance = list(filter(lambda x: x[1] if x[1]=="HR" or x[1]=="FINANCE" else None, zip(df["employee"], df["department"])))

In [202]:
hr_finance

[('Keshav', 'HR'),
 ('Afroze', 'FINANCE'),
 ('Sravya', 'FINANCE'),
 ('Fiona', 'HR')]