## “np.nan”和“np.NaN”

In [5]:
import numpy as np

print("np.nan = {}, type(np.nan) = {}".format(np.nan, type(np.nan)))
print("np.NaN = {}, type(np.NaN) = {}".format(np.NaN, type(np.NaN)))
print(np.nan == np.NaN)
print(np.nan is np.NaN)

np.nan = nan, type(np.nan) = <class 'float'>
np.NaN = nan, type(np.NaN) = <class 'float'>
False
True


## 使用`isnull()`

In [22]:
import pandas as pd
import numpy as np

df = pd.DataFrame([[1.2, 6.5, 3.0],
               [1., np.NaN, np.NaN],
               [np.NaN, np.NaN, np.NaN],
               [np.NaN, 6.2, 3.1]])
print(df.isnull())
print(pd.isnull(df.loc[0,0]))

       0      1      2
0  False  False  False
1  False   True   True
2   True   True   True
3   True  False  False
False


使用`notnull()`

 [What is the difference between NaN and None?](https://stackoverflow.com/questions/17534106/what-is-the-difference-between-nan-and-none)里提到`np.isnan(p)`对传入的参数有要求，如果是string类型那么会crash，使用`pd.isnull()`则要安全得多。

In [25]:
print(df.notnull())
print(pd.notnull(df.loc[2,0]))

import numpy as np
print(np.isnan('h'))

       0      1      2
0   True   True   True
1   True  False  False
2  False  False  False
3  False   True   True
False


TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

## 填充NaN

不管是填充还是替换，都默认不在原有dataframe上生效，如果需要生效需要使用`inplace=True`参数。

In [16]:
import pandas as pd
import numpy as np

df = pd.DataFrame([[1.2, 6.5, 3.0],
               [1., np.NaN, np.NaN],
               [np.NaN, np.NaN, np.NaN],
               [np.NaN, 6.2, 3.1]])
print(df)

df.fillna(0)
print(df)

new_df = df.fillna(0)
print(new_df)

     0    1    2
0  1.2  6.5  3.0
1  1.0  NaN  NaN
2  NaN  NaN  NaN
3  NaN  6.2  3.1
     0    1    2
0  1.2  6.5  3.0
1  1.0  NaN  NaN
2  NaN  NaN  NaN
3  NaN  6.2  3.1
     0    1    2
0  1.2  6.5  3.0
1  1.0  0.0  0.0
2  0.0  0.0  0.0
3  0.0  6.2  3.1


In [17]:
df[1].replace(np.NaN, '0')
print(df)

     0    1    2
0  1.2  6.5  3.0
1  1.0  NaN  NaN
2  NaN  NaN  NaN
3  NaN  6.2  3.1


In [10]:
np.nan > 0.0
type(np.nan)

float

## 使用`ffill`

In [12]:
import pandas as pd
import numpy as np

df = pd.DataFrame([[np.nan, 2, np.nan, 0],
                   [3, 4, 2, 1],
                   [np.nan, 3, 3, 3],
                   [np.nan, 3, 4, 4],
                   [4, 5, 5, 5],
                   [np.nan, 3, 4, 4]],
                  columns=list("ABCD"))
print(df)

df['A'].fillna(method='ffill', inplace=True)
print(df)

     A  B    C  D
0  NaN  2  NaN  0
1  3.0  4  2.0  1
2  NaN  3  3.0  3
3  NaN  3  4.0  4
     A  B    C  D
0  NaN  2  NaN  0
1  3.0  4  2.0  1
2  3.0  3  3.0  3
3  3.0  3  4.0  4


## 使用`dropna()`

In [21]:
s = pd.Series([1, np.NaN, 3.2, np.NaN, 7])
print(s)
print(s.dropna())

df = pd.DataFrame([[1.2, 6.5, 3.0],
               [1., np.NaN, np.NaN],
               [np.NaN, np.NaN, np.NaN],
               [np.NaN, 6.2, 3.1]])
print(df)
print(df.dropna())
print(df.dropna(how='all'))
print(df.dropna(axis=1, how='all'))


print(df.fillna(0))
print(df.fillna({1:0.5, 2:1.0}))

0    1.0
1    NaN
2    3.2
3    NaN
4    7.0
dtype: float64
0    1.0
2    3.2
4    7.0
dtype: float64
     0    1    2
0  1.2  6.5  3.0
1  1.0  NaN  NaN
2  NaN  NaN  NaN
3  NaN  6.2  3.1
     0    1    2
0  1.2  6.5  3.0
     0    1    2
0  1.2  6.5  3.0
1  1.0  NaN  NaN
3  NaN  6.2  3.1
     0    1    2
0  1.2  6.5  3.0
1  1.0  NaN  NaN
2  NaN  NaN  NaN
3  NaN  6.2  3.1
     0    1    2
0  1.2  6.5  3.0
1  1.0  0.0  0.0
2  0.0  0.0  0.0
3  0.0  6.2  3.1
     0    1    2
0  1.2  6.5  3.0
1  1.0  0.5  1.0
2  NaN  0.5  1.0
3  NaN  6.2  3.1


## NaA的计算


In [2]:
import numpy as np

print(np.NaN + 1)
print(sum([np.NaN, 1.0, 2.0]))

nan
nan


In [4]:
import numpy as np

np.NaN > 0.0

False