# Working with Missing Data in Pandas

In [3]:
import numpy as np
import pandas as pd

from pandas import DataFrame

## Filling missing values with fillna(), replace() and interpolate()

In [17]:
data = {"Names":['Kishore','Tharun','Dhanush','Avanthika','Suba','Sneha'],
        "Age":[22,23,23,21,20,23],
        "Education":['MBA','B.tec','BABL','MBA','B.com','Msc.maths'],
        "Lucky no":[4,5,2,5,6,1]}

In [18]:
data

{'Names': ['Kishore', 'Tharun', 'Dhanush', 'Avanthika', 'Suba', 'Sneha'],
 'Age': [22, 23, 23, 21, 20, 23],
 'Education': ['MBA', 'B.tec', 'BABL', 'MBA', 'B.com', 'Msc.maths'],
 'Lucky no': [4, 5, 2, 5, 6, 1]}

In [19]:
ranking_df = DataFrame(data)

In [20]:
ranking_df

Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22,MBA,4
1,Tharun,23,B.tec,5
2,Dhanush,23,BABL,2
3,Avanthika,21,MBA,5
4,Suba,20,B.com,6
5,Sneha,23,Msc.maths,1


In [28]:
ranking_df.iloc[1:2,1:3] = np.nan
ranking_df.iloc[3:4,2:3] = np.nan
ranking_df.iloc[3,:] = np.nan

In [29]:
ranking_df

Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
1,Tharun,,,5.0
2,Dhanush,23.0,BABL,2.0
3,,,,
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0


In [30]:
ranking_df.isnull()

Unnamed: 0,Names,Age,Education,Lucky no
0,False,False,False,False
1,False,True,True,False
2,False,False,False,False
3,True,True,True,True
4,False,False,False,False
5,False,False,False,False


In [32]:
ranking_df.notnull()

Unnamed: 0,Names,Age,Education,Lucky no
0,True,True,True,True
1,True,False,False,True
2,True,True,True,True
3,False,False,False,False
4,True,True,True,True
5,True,True,True,True


In [36]:
"""
pd.isnull(...): Checks each value in the 'Education' column to see if it is null (missing).
ranking_df['Education']: Accesses the 'Education' column from the DataFrame ranking_df.
"""

boolseries = pd.isnull(ranking_df['Education'])
ranking_df[boolseries]

Unnamed: 0,Names,Age,Education,Lucky no
1,Tharun,,,5.0
3,,,,


In [34]:
boolseries

0    False
1     True
2    False
3     True
4    False
5    False
Name: Education, dtype: bool

## filling the missing values

In [37]:
ranking_df.fillna(0)

Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
1,Tharun,0.0,0,5.0
2,Dhanush,23.0,BABL,2.0
3,0,0.0,0,0.0
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0


In [39]:
"""
.fillna(...): A Pandas method used to fill missing values.
method='pad': Tells Pandas to use forward fill, which means:
              If a value is missing, use the value from the row above it.
"""

ranking_df.fillna(method = 'pad')

  ranking_df.fillna(method = 'pad')


Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
1,Tharun,22.0,MBA,5.0
2,Dhanush,23.0,BABL,2.0
3,Dhanush,23.0,BABL,2.0
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0


In [40]:
"""
method='bfill': Stands for backward fill, meaning:
                If a value is missing, replace it with the value from the next row below.
"""

ranking_df.fillna(method = 'bfill')

  ranking_df.fillna(method = 'bfill')


Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
1,Tharun,23.0,BABL,5.0
2,Dhanush,23.0,BABL,2.0
3,Suba,20.0,B.com,6.0
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0


In [42]:
"""
.interpolate(...): A Pandas method that fills in missing values using interpolation.
method='linear': Tells Pandas to estimate missing values by drawing a straight line between known values. This works best for numeric data.
"""
    
ranking_df.interpolate(method = 'linear')

  ranking_df.interpolate(method = 'linear')


Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
1,Tharun,22.5,,5.0
2,Dhanush,23.0,BABL,2.0
3,,21.5,,4.0
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0


In [43]:
ranking_df.dropna()

Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
2,Dhanush,23.0,BABL,2.0
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0


In [44]:
ranking_df.dropna(how = 'all')

Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
1,Tharun,,,5.0
2,Dhanush,23.0,BABL,2.0
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0


In [47]:
ranking_df.dropna(axis=0) #axis = 0 is row ans axix = 1 is column

Unnamed: 0,Names,Age,Education,Lucky no
0,Kishore,22.0,MBA,4.0
2,Dhanush,23.0,BABL,2.0
4,Suba,20.0,B.com,6.0
5,Sneha,23.0,Msc.maths,1.0
