# Missing Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame({"A":[1,2,np.nan],"B":[5,np.nan,np.nan],"C":[1,2,3]},[1,2,3])
df

Unnamed: 0,A,B,C
1,1.0,5.0,1
2,2.0,,2
3,,,3


In [3]:
df.dropna()

Unnamed: 0,A,B,C
1,1.0,5.0,1


In [4]:
df

Unnamed: 0,A,B,C
1,1.0,5.0,1
2,2.0,,2
3,,,3


In [5]:
df.dropna(axis=1)

Unnamed: 0,C
1,1
2,2
3,3


In [6]:
df

Unnamed: 0,A,B,C
1,1.0,5.0,1
2,2.0,,2
3,,,3


### Delete rows based on a threshold number of null values

In [7]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
1,1.0,5.0,1
2,2.0,,2


### Delete columns based on a threshold number of null values

In [8]:
df.dropna(thresh=2,axis=1)

Unnamed: 0,A,C
1,1.0,1
2,2.0,2
3,,3


### Filling the null values with some other data

In [9]:
df.fillna(value="FILL VALUE")

Unnamed: 0,A,B,C
1,1,5,1
2,2,FILL VALUE,2
3,FILL VALUE,FILL VALUE,3


#### Filling columns

In [10]:
df['B'].fillna(value='some Value')

1             5
2    some Value
3    some Value
Name: B, dtype: object

In [11]:
df.loc[2].fillna(value='some VALUE')

A             2
B    some VALUE
C             2
Name: 2, dtype: object

#### Filling missing values with aggregates of the non-null values

In [12]:
df['B'].fillna(value=df['B'].mean(),inplace=True)

In [13]:
df

Unnamed: 0,A,B,C
1,1.0,5.0,1
2,2.0,5.0,2
3,,5.0,3


In [15]:
df['A'].fillna(value=df['A'].mean(),inplace = True)

In [16]:
df

Unnamed: 0,A,B,C
1,1.0,5.0,1
2,2.0,5.0,2
3,1.5,5.0,3


#### Filling rows

In [18]:
df = pd.DataFrame({"A":[1,2,np.nan],"B":[5,np.nan,np.nan],"C":[1,2,3]},[1,2,3])
print(df)
df.iloc[2].fillna(value = df.iloc[2].mean())

     A    B  C
1  1.0  5.0  1
2  2.0  NaN  2
3  NaN  NaN  3


A    3.0
B    3.0
C    3.0
Name: 3, dtype: float64

In [19]:
df.loc[2].fillna(value = df.loc[2].mean())

A    2.0
B    2.0
C    2.0
Name: 2, dtype: float64

### Finding null values

In [20]:
df['B'].isnull()

1    False
2     True
3     True
Name: B, dtype: bool