In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("weather_data.csv", parse_dates=["day"])  # parse the day to date time format
type(df.day[0])
df.set_index('day', inplace=True)  # set the dates to be the index
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6,Rain
2017-01-02,,9,Sunny
2017-01-03,35.0,7,Sunny
2017-01-04,,7,
2017-01-05,28.0,2,Snow
2017-01-07,24.0,7,Snow
2017-01-08,32.0,4,Rain
2017-01-09,32.0,2,Sunny


In [2]:
new_df = df.fillna(0)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6,Rain
2017-01-02,0.0,9,Sunny
2017-01-03,35.0,7,Sunny
2017-01-04,0.0,7,0
2017-01-05,28.0,2,Snow
2017-01-07,24.0,7,Snow
2017-01-08,32.0,4,Rain
2017-01-09,32.0,2,Sunny


In [3]:
new_df = df.fillna({
    'temperature': 0, 
    'windspeed': 0,
    'event': 'no event'
})
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6,Rain
2017-01-02,0.0,9,Sunny
2017-01-03,35.0,7,Sunny
2017-01-04,0.0,7,no event
2017-01-05,28.0,2,Snow
2017-01-07,24.0,7,Snow
2017-01-08,32.0,4,Rain
2017-01-09,32.0,2,Sunny


In [4]:
new_df = df.fillna(method="ffill")  # carry forward NA values from previous cell
new_df
# can also use bfill (back fill), will fill NA with next cell value

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6,Rain
2017-01-02,32.0,9,Sunny
2017-01-03,35.0,7,Sunny
2017-01-04,35.0,7,Sunny
2017-01-05,28.0,2,Snow
2017-01-07,24.0,7,Snow
2017-01-08,32.0,4,Rain
2017-01-09,32.0,2,Sunny


In [5]:
new_df = df.fillna(method="bfill", axis="columns")  # will copy horizontal values instead of vertical
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32,6,Rain
2017-01-02,9,9,Sunny
2017-01-03,35,7,Sunny
2017-01-04,7,7,
2017-01-05,28,2,Snow
2017-01-07,24,7,Snow
2017-01-08,32,4,Rain
2017-01-09,32,2,Sunny


In [6]:
new_df = df.interpolate()  # will auto fill the missing data (linear interpolation unless specified)
new_df   

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6,Rain
2017-01-02,33.5,9,Sunny
2017-01-03,35.0,7,Sunny
2017-01-04,31.5,7,
2017-01-05,28.0,2,Snow
2017-01-07,24.0,7,Snow
2017-01-08,32.0,4,Rain
2017-01-09,32.0,2,Sunny


In [7]:
new_df = df.interpolate(method="time")
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6,Rain
2017-01-02,33.5,9,Sunny
2017-01-03,35.0,7,Sunny
2017-01-04,31.5,7,
2017-01-05,28.0,2,Snow
2017-01-07,24.0,7,Snow
2017-01-08,32.0,4,Rain
2017-01-09,32.0,2,Sunny


In [16]:
new_df = df.dropna()  # will ignore any row containing na
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,32.0,6,Rain
2017-01-03,35.0,7,Sunny
2017-01-05,28.0,2,Snow
2017-01-07,24.0,7,Snow
2017-01-08,32.0,4,Rain
2017-01-09,32.0,2,Sunny


## DataFrame Replace

In [17]:
df = pd.read_csv('weather_data.csv')
new_df = df.replace({
    'temperature': [-999999, -8888888],
    'windspeed': -999999,
    'event': 0}, np.NaN)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,,9 mph,Sunny
2,1/3/2017,35 C,7,Sunny
3,1/4/2017,,7,
4,1/5/2017,28,2,Snow
5,1/7/2017,24,7,Snow
6,1/8/2017,32,4,Rain
7,1/9/2017,32,2,Sunny
8,1/10/2017,-999999,2,Sunny
9,1/10/2017,-888888,2,No Event


In [18]:
new_df = df.replace({
    -999999: np.NaN,
    'No Event': 'Sunny'
})
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32 F,6 mph,Rain
1,1/2/2017,,9 mph,Sunny
2,1/3/2017,35 C,7,Sunny
3,1/4/2017,,7,
4,1/5/2017,28,2,Snow
5,1/7/2017,24,7,Snow
6,1/8/2017,32,4,Rain
7,1/9/2017,32,2,Sunny
8,1/10/2017,-999999,2,Sunny
9,1/10/2017,-888888,2,Sunny


In [22]:
new_df = df.replace({
    'temperature':'[A-Za-z]',
    'windspeed':'[A-Za-z]'
    },'', regex=True)
new_df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32.0,6,Rain
1,1/2/2017,,9,Sunny
2,1/3/2017,35.0,7,Sunny
3,1/4/2017,,7,
4,1/5/2017,28.0,2,Snow
5,1/7/2017,24.0,7,Snow
6,1/8/2017,32.0,4,Rain
7,1/9/2017,32.0,2,Sunny
8,1/10/2017,-999999.0,2,Sunny
9,1/10/2017,-888888.0,2,No Event


In [23]:
df = pd.DataFrame({
    'score': ['exeptional', 'average', 'good', 'poor', 'average', 'exceptional'],
    'student': ['rob', 'maya', 'parhiv', 'tom', 'julian', 'erica']
})

df

Unnamed: 0,score,student
0,exeptional,rob
1,average,maya
2,good,parhiv
3,poor,tom
4,average,julian
5,exceptional,erica


In [25]:
new_df = df.replace(['poor', 'average', 'good', 'exceptional'], [1,2,3,4])
new_df

Unnamed: 0,score,student
0,exeptional,rob
1,2,maya
2,3,parhiv
3,1,tom
4,2,julian
5,4,erica
