## <font color="yellow"><h4 align="center">Handling Missing Data - replace method</font>

In [11]:
import pandas as pd
import numpy as np
df = pd.read_csv("weather_data.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/4/2017,900,9,no event
2,1/5/2017,28,67,Snow
3,1/6/2017,599,7,Rain
4,1/7/2017,32,67,
5,1/8/2017,999,67,Sunny
6,1/9/2017,999,67,Rain
7,1/10/2017,34,8,Cloudy
8,1/11/2017,40,12,Sunny


**Replacing single/multiple value**

In [12]:
# new_df = df.replace(900, value=np.nan)
new_df = df.replace([900,599,999], value=np.nan)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6,Rain
1,1/4/2017,,9,no event
2,1/5/2017,28.0,67,Snow
3,1/6/2017,,7,Rain
4,1/7/2017,32.0,67,
5,1/8/2017,,67,Sunny
6,1/9/2017,,67,Rain
7,1/10/2017,34.0,8,Cloudy
8,1/11/2017,40.0,12,Sunny


**Replacing list with single value**

In [13]:
new_df = df.replace(to_replace=[599,999], value=0)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/4/2017,900,9,no event
2,1/5/2017,28,67,Snow
3,1/6/2017,0,7,Rain
4,1/7/2017,32,67,
5,1/8/2017,0,67,Sunny
6,1/9/2017,0,67,Rain
7,1/10/2017,34,8,Cloudy
8,1/11/2017,40,12,Sunny


**Replacing per column**

In [14]:
new_df = df.replace({
        'temperature': [900,599,999],
        'windspeed': 67,
        'event': '0'
    }, np.nan)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6.0,Rain
1,1/4/2017,,9.0,no event
2,1/5/2017,28.0,,Snow
3,1/6/2017,,7.0,Rain
4,1/7/2017,32.0,,
5,1/8/2017,,,Sunny
6,1/9/2017,,,Rain
7,1/10/2017,34.0,8.0,Cloudy
8,1/11/2017,40.0,12.0,Sunny


**Replacing by using mapping**

In [19]:
new_df = df.replace({
        # [900,599]: np.nan, cant o it like this, we will have to pass individually
        900 : np.nan,
        599 : np.nan,
        'no event': 'mapped Sunny',
    })
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6,Rain
1,1/4/2017,,9,mapped Sunny
2,1/5/2017,28.0,67,Snow
3,1/6/2017,,7,Rain
4,1/7/2017,32.0,67,
5,1/8/2017,999.0,67,Sunny
6,1/9/2017,999.0,67,Rain
7,1/10/2017,34.0,8,Cloudy
8,1/11/2017,40.0,12,Sunny


**Regex: regular expression, used to detect patterns**

In [21]:
df = pd.read_csv("weather_data2.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/4/2017,900,9,no event
2,1/5/2017,28,67,Snow
3,1/6/2017,599,7 mph,Rain
4,1/7/2017,32,67,
5,1/8/2017,999,67,Sunny
6,1/9/2017,999,67,Rain
7,1/10/2017,34 C,8,Cloudy
8,1/11/2017,40,12,Sunny


In [25]:
# when windspeed is 6 mph, 7 mph etc. & temperature is 32 F, 34 C etc.
# new_df = df.replace('[A-Za-z]','', regex=True)  # this will remove all the value from event column too because they are in A-Z

# So we will specify the columns too
new_df = df.replace({
        'temperature': '[A-Za-z]',
        'windspeed': '[a-z]'},
        '', regex=True) 
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/4/2017,900,9,no event
2,1/5/2017,28,67,Snow
3,1/6/2017,599,7,Rain
4,1/7/2017,32,67,
5,1/8/2017,999,67,Sunny
6,1/9/2017,999,67,Rain
7,1/10/2017,34,8,Cloudy
8,1/11/2017,40,12,Sunny


**Replacing list with another list**

In [26]:
df = pd.DataFrame({
    'score': ['exceptional','average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['rob', 'maya', 'sam', 'tom', 'julian', 'erica']
})
df

Unnamed: 0,score,student
0,exceptional,rob
1,average,maya
2,good,sam
3,poor,tom
4,average,julian
5,exceptional,erica


In [27]:
df.replace(['poor', 'average', 'good', 'exceptional'], [1,2,3,4])

  df.replace(['poor', 'average', 'good', 'exceptional'], [1,2,3,4])


Unnamed: 0,score,student
0,4,rob
1,2,maya
2,3,sam
3,1,tom
4,2,julian
5,4,erica
