# `where` and `mask`

In [27]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [28]:
np.random.seed(0)
s = Series(np.random.randint(0, 1000, 10),
           index=list('abcdefghij'))
s

a    684
b    559
c    629
d    192
e    835
f    763
g    707
h    359
i      9
j    723
dtype: int64

In [29]:
# I want to find all of those values that are > 300
s > 300

a     True
b     True
c     True
d    False
e     True
f     True
g     True
h     True
i    False
j     True
dtype: bool

In [30]:
# apply my boolean series as a mask index, getting back only those values that are > 300
s.loc[s>300]

a    684
b    559
c    629
e    835
f    763
g    707
h    359
j    723
dtype: int64

In [31]:
# what if I want to replace the values that are not > 300 with NaN?

s.loc[s<=300] = np.nan

In [32]:
s

a    684.0
b    559.0
c    629.0
d      NaN
e    835.0
f    763.0
g    707.0
h    359.0
i      NaN
j    723.0
dtype: float64

In [33]:
np.random.seed(0)
s = Series(np.random.randint(0, 1000, 10),
           index=list('abcdefghij'))

# this means: wherever the value is > 300, keep the value
# in other cases (else), give me NaN
s.where(s > 300)

a    684.0
b    559.0
c    629.0
d      NaN
e    835.0
f    763.0
g    707.0
h    359.0
i      NaN
j    723.0
dtype: float64

In [34]:
s.where(s > 300, -1)

a    684
b    559
c    629
d     -1
e    835
f    763
g    707
h    359
i     -1
j    723
dtype: int64

In [35]:
# we can pass a callable as the second argument
s.where(s > 300, lambda s_: s_ * -1)

a    684
b    559
c    629
d   -192
e    835
f    763
g    707
h    359
i     -9
j    723
dtype: int64

In [37]:
s.where(s % 2 == 0, lambda s_: s_ + 1)

a    684
b    560
c    630
d    192
e    836
f    764
g    708
h    360
i     10
j    724
dtype: int64

In [38]:
# what is mask? The opposite of where
# where our condition is False, keep the value
# where our condition is True, use the second argument (value or lambda)

s.mask(s % 2 == 0, lambda s_: s_ + 1)

a    685
b    559
c    629
d    193
e    835
f    763
g    707
h    359
i      9
j    723
dtype: int64

In [39]:
np.random.seed(0)
df = DataFrame(np.random.randint(0, 1000, [4,4]),
               index=list('abcd'),
               columns=list('wxyz'))
df

Unnamed: 0,w,x,y,z
a,684,559,629,192
b,835,763,707,359
c,9,723,277,754
d,804,599,70,472


In [40]:
df.where(df['x'] > 600)

Unnamed: 0,w,x,y,z
a,,,,
b,835.0,763.0,707.0,359.0
c,9.0,723.0,277.0,754.0
d,,,,


In [41]:
df.where(df['x'] > 600, -1)

Unnamed: 0,w,x,y,z
a,-1,-1,-1,-1
b,835,763,707,359
c,9,723,277,754
d,-1,-1,-1,-1


In [42]:
df.where(df['x'] > 600, -1 * df)

Unnamed: 0,w,x,y,z
a,-684,-559,-629,-192
b,835,763,707,359
c,9,723,277,754
d,-804,-599,-70,-472


In [48]:
df.mask(df['x'] > 600, -1 * df)

Unnamed: 0,w,x,y,z
a,684,559,629,192
b,-835,-763,-707,-359
c,-9,-723,-277,-754
d,804,599,70,472
