In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({'Name': 'Paul,George,Ringo'.split(','),
                  'growth': [.5, .7, 1.2]})


## Selection
Allows for querying with logical operators (and, or, not)

In [3]:
df

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7
2,Ringo,1.2


In [4]:
# select row(s)
df[df.Name == 'Paul']

Unnamed: 0,Name,growth
0,Paul,0.5


In [7]:
df.Name == 'Paul'

0     True
1    False
2    False
Name: Name, dtype: bool

In [8]:
df.Name.str.contains('o')

0    False
1     True
2     True
Name: Name, dtype: bool

In [9]:
df[df.Name.str.contains('o')]

Unnamed: 0,Name,growth
1,George,0.7
2,Ringo,1.2


In [10]:
df.dtypes

Name       object
growth    float64
dtype: object

In [11]:
# and

df[df.Name == 'Paul' & df.growth > .4]

TypeError: cannot compare a dtyped [float64] array with a scalar of type [bool]

In [16]:
# and

df[(df.Name == 'Paul') and (df.growth > .4)]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [12]:
# and

df[(df.Name == 'Paul') & (df.growth > .4)]

Unnamed: 0,Name,growth
0,Paul,0.5


In [13]:
# and
# dont work
df[(df.Name == 'Paul') and (df.growth > .4)]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [14]:
#  and 
# might need to install numexpr if not provided

df.query('Name == "Paul" and growth > .4')

Unnamed: 0,Name,growth
0,Paul,0.5


In [15]:
# or 
df[(df.Name == 'Paul') | (df.Name == 'George')]

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7


In [16]:
# or 
df.query('Name == "Paul" or Name=="George"')

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7


In [17]:
# not
df[~(df.Name == 'John')]

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7
2,Ringo,1.2


In [19]:
# not
# error
~df.Name == 'John'

TypeError: bad operand type for unary ~: 'str'

In [20]:
# not
# with paranthesis
~(df.Name == 'John')

0    True
1    True
2    True
Name: Name, dtype: bool

In [21]:
# not
df.query('Name != "John"')

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7
2,Ringo,1.2


In [22]:
# not
df.query('not Name == "John"')

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7
2,Ringo,1.2


In [23]:
# can also pull out rows based on position
df.head(2)

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7


In [25]:
df.tail(2)

Unnamed: 0,Name,growth
1,George,0.7
2,Ringo,1.2


In [26]:
df.iloc[[2,1]]

Unnamed: 0,Name,growth
2,Ringo,1.2
1,George,0.7


In [27]:
df[:2]

Unnamed: 0,Name,growth
0,Paul,0.5
1,George,0.7


In [28]:
df[1:]

Unnamed: 0,Name,growth
1,George,0.7
2,Ringo,1.2
