http://pandas.pydata.org/pandas-docs/stable/missing_data.html

In [3]:
import pandas as pd
import numpy as np

In [5]:
df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'], 
                 columns=['one', 'two', 'three'])
df

Unnamed: 0,one,two,three
a,-1.10991,-0.275704,-1.749484
c,-0.835892,-0.298652,0.909921
e,-0.097383,1.487967,0.855737
f,0.34686,-0.41503,0.122176
h,0.292697,-2.257308,-1.820376


In [7]:
df['four'] = 'bar'
df['five'] = df['one'] > 0
df

Unnamed: 0,one,two,three,four,five
a,-1.10991,-0.275704,-1.749484,bar,False
c,-0.835892,-0.298652,0.909921,bar,False
e,-0.097383,1.487967,0.855737,bar,False
f,0.34686,-0.41503,0.122176,bar,True
h,0.292697,-2.257308,-1.820376,bar,True


In [8]:
df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
df2

Unnamed: 0,one,two,three,four,five
a,-1.10991,-0.275704,-1.749484,bar,False
b,,,,,
c,-0.835892,-0.298652,0.909921,bar,False
d,,,,,
e,-0.097383,1.487967,0.855737,bar,False
f,0.34686,-0.41503,0.122176,bar,True
g,,,,,
h,0.292697,-2.257308,-1.820376,bar,True


In [9]:
pd.isna(df2['one'])

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool

In [10]:
df2['four'].notna()

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: four, dtype: bool

In [11]:
df2.isna()

Unnamed: 0,one,two,three,four,five
a,False,False,False,False,False
b,True,True,True,True,True
c,False,False,False,False,False
d,True,True,True,True,True
e,False,False,False,False,False
f,False,False,False,False,False
g,True,True,True,True,True
h,False,False,False,False,False


In [12]:
df2 = df.copy()
df2['timestamp'] = pd.Timestamp('20120101')
df2

Unnamed: 0,one,two,three,four,five,timestamp
a,-1.10991,-0.275704,-1.749484,bar,False,2012-01-01
c,-0.835892,-0.298652,0.909921,bar,False,2012-01-01
e,-0.097383,1.487967,0.855737,bar,False,2012-01-01
f,0.34686,-0.41503,0.122176,bar,True,2012-01-01
h,0.292697,-2.257308,-1.820376,bar,True,2012-01-01


In [14]:
df2.loc[['a', 'c', 'h'], ['one', 'timestamp']] = np.nan
df2

Unnamed: 0,one,two,three,four,five,timestamp
a,,-0.275704,-1.749484,bar,False,NaT
c,,-0.298652,0.909921,bar,False,NaT
e,-0.097383,1.487967,0.855737,bar,False,2012-01-01
f,0.34686,-0.41503,0.122176,bar,True,2012-01-01
h,,-2.257308,-1.820376,bar,True,NaT


In [15]:
df2.get_dtype_counts()

bool              1
datetime64[ns]    1
float64           3
object            1
dtype: int64

In [18]:
# Insert missing data
s = pd.Series([1,2,3])
s.loc[0] = None
s

0    NaN
1    2.0
2    3.0
dtype: float64

In [21]:
df2['one'].sum()

0.24947749881614967

In [22]:
df2.groupby('one').mean()

Unnamed: 0_level_0,two,three,five
one,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-0.097383,1.487967,0.855737,False
0.34686,-0.41503,0.122176,True


In [24]:
df2 = df2.fillna(0)
df2

Unnamed: 0,one,two,three,four,five,timestamp
a,0.0,-0.275704,-1.749484,bar,False,0
c,0.0,-0.298652,0.909921,bar,False,0
e,-0.097383,1.487967,0.855737,bar,False,2012-01-01 00:00:00
f,0.34686,-0.41503,0.122176,bar,True,2012-01-01 00:00:00
h,0.0,-2.257308,-1.820376,bar,True,0


In [25]:
df2 = df2['four'].fillna('missing')
df2

a    bar
c    bar
e    bar
f    bar
h    bar
Name: four, dtype: object