In [3]:
import pandas as pd
import numpy as np

In [4]:
incomplete_df = pd.DataFrame({'id': [1,2,3,2,2,3,1,1,1,2,4],
                              'type': ['one', 'one', 'two', 'three', 'two', 'three', 'one', 'two', 'one', 'three','one'],
                              'amount': [345,928,np.NAN,645,113,942,np.NAN,539,np.NAN,814,np.NAN] 
                             }, columns=['id','type','amount'])

In [5]:
incomplete_df

Unnamed: 0,id,type,amount
0,1,one,345.0
1,2,one,928.0
2,3,two,
3,2,three,645.0
4,2,two,113.0
5,3,three,942.0
6,1,one,
7,1,two,539.0
8,1,one,
9,2,three,814.0


In [6]:
A = incomplete_df['amount']
B = pd.Series(data=[np.NAN,125,335,345,312,np.NAN,np.NAN,129,551,800,222])

In [7]:
print A 
print '\n'
print B

0     345.0
1     928.0
2       NaN
3     645.0
4     113.0
5     942.0
6       NaN
7     539.0
8       NaN
9     814.0
10      NaN
Name: amount, dtype: float64


0       NaN
1     125.0
2     335.0
3     345.0
4     312.0
5       NaN
6       NaN
7     129.0
8     551.0
9     800.0
10    222.0
dtype: float64


In [8]:
A.mean()

618.0

In [9]:
print B.min()
print B.max()
print B.std()
print B.var()

125.0
800.0
226.904597636
51485.6964286


In [10]:
A+B

0        NaN
1     1053.0
2        NaN
3      990.0
4      425.0
5        NaN
6        NaN
7      668.0
8        NaN
9     1614.0
10       NaN
dtype: float64

In [11]:
A

0     345.0
1     928.0
2       NaN
3     645.0
4     113.0
5     942.0
6       NaN
7     539.0
8       NaN
9     814.0
10      NaN
Name: amount, dtype: float64

In [12]:
A.dropna()

0    345.0
1    928.0
3    645.0
4    113.0
5    942.0
7    539.0
9    814.0
Name: amount, dtype: float64

In [13]:
A.fillna(-1)

0     345.0
1     928.0
2      -1.0
3     645.0
4     113.0
5     942.0
6      -1.0
7     539.0
8      -1.0
9     814.0
10     -1.0
Name: amount, dtype: float64

In [14]:
A.fillna('missing data')

0              345
1              928
2     missing data
3              645
4              113
5              942
6     missing data
7              539
8     missing data
9              814
10    missing data
Name: amount, dtype: object

In [15]:
A.fillna(A.mean())

0     345.0
1     928.0
2     618.0
3     645.0
4     113.0
5     942.0
6     618.0
7     539.0
8     618.0
9     814.0
10    618.0
Name: amount, dtype: float64

In [16]:
incomplete_df

Unnamed: 0,id,type,amount
0,1,one,345.0
1,2,one,928.0
2,3,two,
3,2,three,645.0
4,2,two,113.0
5,3,three,942.0
6,1,one,
7,1,two,539.0
8,1,one,
9,2,three,814.0


In [17]:
# Fill in gaps in the 'amount' column with means obtained from corresponding id's in the first column
incomplete_df["amount"].fillna(incomplete_df.groupby("id")["amount"].transform("mean"),inplace=True)
# If there is no corresponding id, simply use the overall mean
incomplete_df["amount"].fillna(incomplete_df["amount"].mean(), inplace=True)
incomplete_df

Unnamed: 0,id,type,amount
0,1,one,345.0
1,2,one,928.0
2,3,two,942.0
3,2,three,645.0
4,2,two,113.0
5,3,three,942.0
6,1,one,442.0
7,1,two,539.0
8,1,one,442.0
9,2,three,814.0


In [18]:
print B
print '\n'
print B.fillna(method='pad')

0       NaN
1     125.0
2     335.0
3     345.0
4     312.0
5       NaN
6       NaN
7     129.0
8     551.0
9     800.0
10    222.0
dtype: float64


0       NaN
1     125.0
2     335.0
3     345.0
4     312.0
5     312.0
6     312.0
7     129.0
8     551.0
9     800.0
10    222.0
dtype: float64


In [19]:
B.fillna(method='bfill',limit=1)

0     125.0
1     125.0
2     335.0
3     345.0
4     312.0
5       NaN
6     129.0
7     129.0
8     551.0
9     800.0
10    222.0
dtype: float64

In [20]:
print B
print '\n'
print B.interpolate()

0       NaN
1     125.0
2     335.0
3     345.0
4     312.0
5       NaN
6       NaN
7     129.0
8     551.0
9     800.0
10    222.0
dtype: float64


0       NaN
1     125.0
2     335.0
3     345.0
4     312.0
5     251.0
6     190.0
7     129.0
8     551.0
9     800.0
10    222.0
dtype: float64


In [21]:
B.interpolate(method='barycentric')

0            NaN
1     125.000000
2     335.000000
3     345.000000
4     312.000000
5     146.563492
6      -1.849206
7     129.000000
8     551.000000
9     800.000000
10    222.000000
dtype: float64

In [22]:
B.interpolate(method='pchip')

0            NaN
1     125.000000
2     335.000000
3     345.000000
4     312.000000
5     246.420420
6     167.376877
7     129.000000
8     551.000000
9     800.000000
10    222.000000
dtype: float64

In [23]:
np.random.seed(2)
ser = pd.Series(np.arange(1, 10.1, .25)**2 + np.random.randn(37))
bad = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29])
ser[bad] = np.nan
methods = ['linear', 'quadratic', 'cubic']
df = pd.DataFrame({m: ser.interpolate(method=m) for m in methods})
df.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x10e484790>