In [2]:
import pandas as pd
import numpy as np

string_data = pd.Series(['aardvark', 'artichoke', np.nan, 'avocado'])
string_data

0     aardvark
1    artichoke
2          NaN
3      avocado
dtype: object

#### Check for null values

In [3]:
string_data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [5]:
string_data[0] = None
string_data.isnull()

0     True
1    False
2     True
3    False
dtype: bool

In [6]:
df = pd.DataFrame(np.random.randn(7, 3))
df.iloc[:4, 1] = np.NAN
df.iloc[:2, 2] = np.NAN
df

Unnamed: 0,0,1,2
0,0.336213,,
1,0.279781,,
2,1.682254,,-0.480039
3,0.893915,,0.794611
4,-0.489534,-0.17058,0.926408
5,0.954218,0.324234,0.695671
6,0.946345,-2.203712,1.09


In [7]:
df.dropna()

Unnamed: 0,0,1,2
4,-0.489534,-0.17058,0.926408
5,0.954218,0.324234,0.695671
6,0.946345,-2.203712,1.09


In [12]:
df.dropna(thresh=2)

Unnamed: 0,0,1,2
2,1.682254,,-0.480039
3,0.893915,,0.794611
4,-0.489534,-0.17058,0.926408
5,0.954218,0.324234,0.695671
6,0.946345,-2.203712,1.09


In [13]:
df.fillna(0)

Unnamed: 0,0,1,2
0,0.336213,0.0,0.0
1,0.279781,0.0,0.0
2,1.682254,0.0,-0.480039
3,0.893915,0.0,0.794611
4,-0.489534,-0.17058,0.926408
5,0.954218,0.324234,0.695671
6,0.946345,-2.203712,1.09


In [14]:
df.fillna({1: 0.5, 2: 0}) # Fills in with different value for each column

Unnamed: 0,0,1,2
0,0.336213,0.5,0.0
1,0.279781,0.5,0.0
2,1.682254,0.5,-0.480039
3,0.893915,0.5,0.794611
4,-0.489534,-0.17058,0.926408
5,0.954218,0.324234,0.695671
6,0.946345,-2.203712,1.09


In [15]:
df = pd.DataFrame(np.random.randn(6, 3))
df.iloc[2:, 1] = np.NAN
df.iloc[4:, 2] = np.NAN
df

Unnamed: 0,0,1,2
0,-0.957259,-0.88395,-1.00024
1,0.898151,-0.90831,0.089191
2,0.871897,,1.567223
3,-0.058361,,0.492501
4,-2.194597,,
5,-0.994889,,


In [16]:
df.fillna(method='ffill')

Unnamed: 0,0,1,2
0,-0.957259,-0.88395,-1.00024
1,0.898151,-0.90831,0.089191
2,0.871897,-0.90831,1.567223
3,-0.058361,-0.90831,0.492501
4,-2.194597,-0.90831,0.492501
5,-0.994889,-0.90831,0.492501


In [19]:
df.fillna(method='ffill', limit=2)

Unnamed: 0,0,1,2
0,-0.957259,-0.88395,-1.00024
1,0.898151,-0.90831,0.089191
2,0.871897,-0.90831,1.567223
3,-0.058361,-0.90831,0.492501
4,-2.194597,,0.492501
5,-0.994889,,0.492501


In [20]:
data = pd.Series([1., np.NAN, 3.5, np.NAN, 7])
data.fillna(data.mean())

0    1.000000
1    3.833333
2    3.500000
3    3.833333
4    7.000000
dtype: float64

In [27]:
data = pd.DataFrame(np.arange(12).reshape((3, 4)),
                    index=['Ohio', 'Colorado', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data.index = data.index.map(lambda x: x[:4].upper())

data.rename(index=str.title, columns=str.upper)


Unnamed: 0,ONE,TWO,THREE,FOUR
Ohio,0,1,2,3
Colo,4,5,6,7
New,8,9,10,11
