In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {'planet': ['Mercury', 'Venus', 'Earth', 'Mars',
                   'Jupiter', 'Saturn', 'Uranus', 'Neptune'],
       'radius_km': [2440, 6052, 6371, 3390, 69911, 58232,
                     25362, 24622],
       'moons': [0, 0, 1, 2, 80, 83, 27, 14]
        }
df = pd.DataFrame(data)
df

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
4,Jupiter,69911,80
5,Saturn,58232,83
6,Uranus,25362,27
7,Neptune,24622,14


In [5]:
# planets that have fewer than 20 moons

print(df['moons'] < 20)

# This results in a Series object of dtype: bool

0     True
1     True
2     True
3     True
4    False
5    False
6    False
7     True
Name: moons, dtype: bool


In [6]:
# You can also assign the Boolean mask to a named variable and then apply that to your dataframe:
mask = df['moons'] < 20
df[mask]

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
7,Neptune,24622,14


In [10]:
# And if you want to select just the planet column as a Series object, 
# you can use regular selection tools like loc[]:
mask = df['moons'] < 20
df.loc[mask, ('planet','moons')] # several column
df.loc[mask, 'planet']           # one column

0    Mercury
1      Venus
2      Earth
3       Mars
7    Neptune
Name: planet, dtype: object

&  == and # Alt 38 
|  == or  # Alt 124 
~  == not # Alt 126

In [11]:
mask = (df['moons'] < 10) | (df['moons'] > 50)
mask

0     True
1     True
2     True
3     True
4     True
5     True
6    False
7    False
Name: moons, dtype: bool

Notice that each condition is self-contained in a set of parentheses, and the two conditions are separated by the logical operator, |(or). To apply the mask, call the dataframe and put the statement or the variable it’s assigned to in selector brackets:

In [12]:
mask = (df['moons'] < 10) | (df['moons'] > 50)
df[mask]

Unnamed: 0,planet,radius_km,moons
0,Mercury,2440,0
1,Venus,6052,0
2,Earth,6371,1
3,Mars,3390,2
4,Jupiter,69911,80
5,Saturn,58232,83


In [13]:
mask = (df['moons'] > 20) & ~(df['moons'] == 80) & ~(df['radius_km'] < 50000)
df[mask]

Unnamed: 0,planet,radius_km,moons
5,Saturn,58232,83


In [15]:
# Use != instead of ~
mask = (df['moons'] > 20) & (df['moons'] != 80) & (df['radius_km'] >= 50000)
df[mask]

Unnamed: 0,planet,radius_km,moons
5,Saturn,58232,83
