In [1]:
from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd
from io import StringIO

In [2]:
csv_data = '''A,B,C,D
... 1.0,2.0,3.0,4.0
... 5.0,6.0,,8.0
... 10.0,11.0,12.0,'''

df = pd.read_csv(StringIO(csv_data))

In [3]:
si = SimpleImputer(missing_values=np.nan, strategy='mean').fit(df.values) # Replaces every missing value with its column mean
imputed_data = si.transform(df.values)
print(df)
print(imputed_data)

      A     B     C    D
0   1.0   2.0   3.0  4.0
1   5.0   6.0   NaN  8.0
2  10.0  11.0  12.0  NaN
[[ 1.   2.   3.   4. ]
 [ 5.   6.   7.5  8. ]
 [10.  11.  12.   6. ]]


In [4]:
# Easier way -> use pandas fillna
print(df.fillna(df.mean()))

      A     B     C    D
0   1.0   2.0   3.0  4.0
1   5.0   6.0   7.5  8.0
2  10.0  11.0  12.0  6.0


In [6]:
# Fill with the preceeding value (time series usually)
print(df.ffill())

      A     B     C    D
0   1.0   2.0   3.0  4.0
1   5.0   6.0   3.0  8.0
2  10.0  11.0  12.0  8.0


In [7]:
# Fill with the upcoming value
print(df.bfill())

      A     B     C    D
0   1.0   2.0   3.0  4.0
1   5.0   6.0  12.0  8.0
2  10.0  11.0  12.0  NaN


In [9]:
# Fill with average of previous and next value
print((df.ffill() + df.bfill()) / 2)

      A     B     C    D
0   1.0   2.0   3.0  4.0
1   5.0   6.0   7.5  8.0
2  10.0  11.0  12.0  NaN
