Handling Missing Data - replace method
--------------------------------------

In [2]:
import pandas as pd
import numpy as np

In [12]:
df = pd.read_csv("02weather_data.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,-99999,7 mph,Sunny
2,1/3/2017,28 c,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


Replacing single value
----------------------

In [13]:
new_df = df.replace(-99999, value=0.5)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,-99999,7 mph,Sunny
2,1/3/2017,28 c,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


Replacing list with single value
--------------------------------

In [18]:
# temperature, windspeed have string column value so use string values ('-99999' , '31')  

new_df = df.replace(to_replace=['-99999', '31', 'Sunny'], value=0.5)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,0.5,7 mph,0.5
2,1/3/2017,28 c,0.5,Snow
3,1/4/2017,0.5,7,0
4,1/5/2017,32,0.5,Rain
5,1/6/2017,0.5,2,0.5
6,1/6/2017,34,5,0


Replacing per column
--------------------

In [20]:
new_df = df.replace({
        'temperature': '-99999',
        'windspeed': '-99999',
        'event': '0'
    }, np.nan)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,,7 mph,Sunny
2,1/3/2017,28 c,,Snow
3,1/4/2017,,7,
4,1/5/2017,32,,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,


In [22]:
new_df = df.replace({
        'temperature': '-99999',
        'windspeed': '7 mph',
        'event': '0'
    }, value=0.5)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,0.5,0.5,Sunny
2,1/3/2017,28 c,-99999,Snow
3,1/4/2017,0.5,7,0.5
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0.5


Regex
-----

In [24]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,-99999,7 mph,Sunny
2,1/3/2017,28 c,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [27]:
# when windspeed is 6 mph, 7 mph etc. & temperature is 32 F, 28 F etc.

new_df = df.replace({'temperature': '[A-Za-z]', 'windspeed': '[a-z]'},'', regex=True) 
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [29]:
# when windspeed is 6 mph, 7 mph etc. & temperature is 32 F, 28 F etc.

new_df = df.replace({'temperature': '[A-Za-z]', 'windspeed': '[a-z]'},'h', regex=True) 
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 h,6 hhh,Rain
1,1/2/2017,-99999,7 hhh,Sunny
2,1/3/2017,28 h,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


Replacing list with another list
--------------------------------

In [30]:
df = pd.DataFrame({
    'score': ['exceptional','average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['rob', 'maya', 'parthiv', 'tom', 'julian', 'erica']
})
df

Unnamed: 0,score,student
0,exceptional,rob
1,average,maya
2,good,parthiv
3,poor,tom
4,average,julian
5,exceptional,erica


In [32]:
# change Score values

df.replace(['poor', 'average', 'good', 'exceptional'], [1,2,3,4])

  df.replace(['poor', 'average', 'good', 'exceptional'], [1,2,3,4])


Unnamed: 0,score,student
0,4,rob
1,2,maya
2,3,parthiv
3,1,tom
4,2,julian
5,4,erica
