In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
df = DataFrame(np.random.randint(0, 1000, [6,6]),
               index=list('abcdef'),
               columns=list('uvwxyz'))
df

Unnamed: 0,u,v,w,x,y,z
a,454,999,144,254,300,220
b,534,580,274,888,993,846
c,744,408,3,257,761,291
d,49,328,216,258,235,341
e,315,702,232,751,944,364
f,201,588,659,847,942,170


In [3]:
# I want to know where row 'c' is 744
df.loc['c'] == 744

u     True
v    False
w    False
x    False
y    False
z    False
Name: c, dtype: bool

In [4]:
# I want to know where row 'c' in 408
df.loc['c'] == 408

u    False
v     True
w    False
x    False
y    False
z    False
Name: c, dtype: bool

In [5]:
# I want to know where row 'c' in 3
df.loc['c'] == 3

u    False
v    False
w     True
x    False
y    False
z    False
Name: c, dtype: bool

In [7]:
# I want to know where 'c' is either 744, 408, or 3
# we can use | as a version of "or" on our series

(df.loc['c'] == 744) | (df.loc['c'] == 408) | (df.loc['c'] == 3)

u     True
v     True
w     True
x    False
y    False
z    False
Name: c, dtype: bool

In [8]:
# there's another way to do this, the "isin" method
df.loc['c'].isin([744, 408, 3])

u     True
v     True
w     True
x    False
y    False
z    False
Name: c, dtype: bool

In [9]:
# which of these techniques will execute faster?

%timeit (df.loc['c'] == 744) | (df.loc['c'] == 408) | (df.loc['c'] == 3)

319 µs ± 5.52 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [10]:
%timeit df.loc['c'].isin([744, 408, 3])

80.2 µs ± 296 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [11]:
80 / 319

0.2507836990595611