# Handling Missing Data: Replace function

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [2]:
#if we have one special value we can replace it as follows
#replace -99999 with NaN
new_df = df.replace(-99999, np.NaN)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,0
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,0


In [4]:
df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-88888,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [5]:
#what if we have 2 or more special values 
new_df = df.replace([-99999, -88888], np.NaN)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,0
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,0


In [6]:
#we can do replace according to specific column as follows
df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [8]:
new_df = df.replace({'temperature': -99999,
                    'windspeed': -99999,
                    'event': '0'}, np.NaN)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/2/2017,,7.0,Sunny
2,1/3/2017,28.0,,Snow
3,1/4/2017,,7.0,
4,1/5/2017,32.0,,Rain
5,1/6/2017,31.0,2.0,Sunny
6,1/6/2017,34.0,5.0,


In [9]:
df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 C,6 mph,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7 mph,0
4,1/5/2017,32 C,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [11]:
#suppose if you want to replace mph or C with blank use regx (regular expressions)
new_df = df.replace('[A-Za-z]', '', regex=True)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,
1,1/2/2017,-99999,7,
2,1/3/2017,28,-99999,
3,1/4/2017,-99999,7,0.0
4,1/5/2017,32,-99999,
5,1/6/2017,31,2,
6,1/6/2017,34,5,0.0


In [13]:
#above thing applied for entire dataframe
new_df = df.replace({'temperature': '[A-Za-z]',
                    'windspeed': '[A-Za-z]'}, '', regex=True)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [14]:
#suppose if we want to replace one list of values with another list of values
df = pd.DataFrame({
    'score': ['exceptional', 'average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['satish', 'brahma', 'murali', 'ramu', 'srinu', 'naveen']
})
df

Unnamed: 0,score,student
0,exceptional,satish
1,average,brahma
2,good,murali
3,poor,ramu
4,average,srinu
5,exceptional,naveen


In [16]:
#suppose if we want to replace score with numbers
new_df = df.replace(['poor', 'average', 'good', 'exceptional'], [1, 2, 3, 4])
new_df

Unnamed: 0,score,student
0,4,satish
1,2,brahma
2,3,murali
3,1,ramu
4,2,srinu
5,4,naveen
