In [1]:
import pandas as pd
import numpy as np

In [2]:
# pandas mask
# the mask() method in Pandas is used to replace values where certain conditions are met

df = pd.DataFrame({
    'Hourly_Salary': ['500.00', '10000.00', '200.00', '20.00', np.nan]
})

df['Hourly_Salary'] = pd.to_numeric(df['Hourly_Salary'])


In [None]:
# Example 1 - if else state location
df_mask = df.mask(df >= 1000)
df_mask


Unnamed: 0,Hourly_Salary
0,500.0
1,
2,200.0
3,20.0
4,


In [5]:
# Example 2 Keep under 1000, replace with other value - Doesnt fix null value
df_mask_2 = df.mask(df >= 1000, other=999)
df_mask_2

Unnamed: 0,Hourly_Salary
0,500.0
1,999.0
2,200.0
3,20.0
4,


In [None]:
# Example 3 fill null value
# Here we replace all NaN values in df with 0

df_mask_3 = df.mask(df.isnull(), 0)
df_mask_3

Unnamed: 0,Hourly_Salary
0,500.0
1,10000.0
2,200.0
3,20.0
4,0.0


In [7]:
# Example 4 Column Example Replace anything over 100000 with null

df2 = pd.DataFrame({
    'Running Back': ['Barry Sanders', 'Walter Payton', 'Emmitt Smith', 'Jim Brown'],
    'Career Rushing Yards': [152690, 16726, 18355, 12312],
    'Touchdowns': [99, 110, 164, 106]
})
df2

Unnamed: 0,Running Back,Career Rushing Yards,Touchdowns
0,Barry Sanders,152690,99
1,Walter Payton,16726,110
2,Emmitt Smith,18355,164
3,Jim Brown,12312,106


In [None]:
# Here, we replace values with NaN in any row where “Career Rushing Yards” is greater than or equal to 100000

df2.mask(df2["Career Rushing Yards"] >= 100000)

Unnamed: 0,Running Back,Career Rushing Yards,Touchdowns
0,,,
1,Walter Payton,16726.0,110.0
2,Emmitt Smith,18355.0,164.0
3,Jim Brown,12312.0,106.0


In [9]:
# Example 5 Multiple Conditions and
# Here, we replace values with NaN in rows where Touchdowns is greater than 99 and Career Rushing Yards == 18355

df2.mask((df2["Touchdowns"] > 99) & (df2["Career Rushing Yards"] == 18355))

Unnamed: 0,Running Back,Career Rushing Yards,Touchdowns
0,Barry Sanders,152690.0,99.0
1,Walter Payton,16726.0,110.0
2,,,
3,Jim Brown,12312.0,106.0


In [10]:
# Example 6 Multiple Conditions or, filters outside
filter1 = df2["Touchdowns"] > 108
filter1

0    False
1     True
2     True
3    False
Name: Touchdowns, dtype: bool

In [11]:
filter2 = df2["Career Rushing Yards"] == 18355
filter2

0    False
1    False
2     True
3    False
Name: Career Rushing Yards, dtype: bool

In [12]:
df2.mask(filter1 | filter2)

Unnamed: 0,Running Back,Career Rushing Yards,Touchdowns
0,Barry Sanders,152690.0,99.0
1,,,
2,,,
3,Jim Brown,12312.0,106.0


In [13]:
# Example 7 create new column, flag if a total is less
df2["touchdown_totals"] = df2["Touchdowns"].mask(df2["Touchdowns"] < 100, other="Less Than 100")
df2

Unnamed: 0,Running Back,Career Rushing Yards,Touchdowns,touchdown_totals
0,Barry Sanders,152690,99,Less Than 100
1,Walter Payton,16726,110,110
2,Emmitt Smith,18355,164,164
3,Jim Brown,12312,106,106
