In [2]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

### Finding valid data
The $notnull$ function is good for making sure we get valid data, in one or several columns.

In [2]:
people = DataFrame(np.random.randn(5, 5),
                   columns=['a', 'b', 'c', 'd', 'e'],
                   index=['Joe', 'Steve', 'Wes', 'Jim', 'Travis'])
people.ix[2:3, ['b', 'c']] = np.nan # Add a few NA values
people.ix[0:1, ['c']] = np.nan
people.ix[4:5, ['b']] = np.nan
people

Unnamed: 0,a,b,c,d,e
Joe,0.337588,0.285619,,2.689676,-0.348729
Steve,0.824332,0.668416,0.864886,1.619118,1.409903
Wes,0.187006,,,-0.341013,0.566728
Jim,0.113149,0.801743,0.402575,1.152984,-0.005146
Travis,0.158797,,0.295187,-0.309163,0.579762


In [3]:
people[people['b'].notnull()]

Unnamed: 0,a,b,c,d,e
Joe,0.337588,0.285619,,2.689676,-0.348729
Steve,0.824332,0.668416,0.864886,1.619118,1.409903
Jim,0.113149,0.801743,0.402575,1.152984,-0.005146


In [4]:
people[people['c'].notnull()]

Unnamed: 0,a,b,c,d,e
Steve,0.824332,0.668416,0.864886,1.619118,1.409903
Jim,0.113149,0.801743,0.402575,1.152984,-0.005146
Travis,0.158797,,0.295187,-0.309163,0.579762


In [6]:
# Both criteria combined
people[people['b'].notnull() & people['c'].notnull()]

Unnamed: 0,a,b,c,d,e
Steve,0.824332,0.668416,0.864886,1.619118,1.409903
Jim,0.113149,0.801743,0.402575,1.152984,-0.005146


In [7]:
people.notnull()

Unnamed: 0,a,b,c,d,e
Joe,True,True,False,True,True
Steve,True,True,True,True,True
Wes,True,False,False,True,True
Jim,True,True,True,True,True
Travis,True,False,True,True,True


## Also works with column names

In [3]:
df = pd.DataFrame({'AAA' : [4,np.NaN,6,7], 'BBB' : [10,20,np.NaN,40],'CCC' : [100,50,-30,-50]})
df

Unnamed: 0,AAA,BBB,CCC
0,4.0,10.0,100
1,,20.0,50
2,6.0,,-30
3,7.0,40.0,-50


In [4]:
df[pd.notnull(df.AAA)]

Unnamed: 0,AAA,BBB,CCC
0,4,10.0,100
2,6,,-30
3,7,40.0,-50


In [5]:
df[pd.notnull(df.BBB)]

Unnamed: 0,AAA,BBB,CCC
0,4.0,10,100
1,,20,50
3,7.0,40,-50


In [6]:
df[pd.notnull(df.AAA) &pd.notnull(df.BBB)]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
3,7,40,-50
