In [207]:
import numpy as np
import pandas as pd

In [208]:
df = pd.read_csv('missing_values.csv')
df.head()

Unnamed: 0,A,B,C
0,0.432492,0.532161,0.700842
1,,0.827934,0.87432
2,0.494307,,0.857627
3,0.844698,0.394283,0.798843
4,0.340938,0.97547,0.729233


In [209]:
# Drop all row with missing data in it
drop_data = df.dropna()
drop_data.head()

Unnamed: 0,A,B,C
0,0.432492,0.532161,0.700842
3,0.844698,0.394283,0.798843
4,0.340938,0.97547,0.729233
5,0.321776,0.512439,0.481819
7,0.532083,0.853227,0.239124


In [210]:
# Fill all missing values with 69
sx = df.fillna(69) # we can use `inplace=True` to modify the dataframe inplace, note that with inplace it doesn't copy the data
sx.head()

Unnamed: 0,A,B,C
0,0.432492,0.532161,0.700842
1,69.0,0.827934,0.87432
2,0.494307,69.0,0.857627
3,0.844698,0.394283,0.798843
4,0.340938,0.97547,0.729233


In [220]:
# Set index and sort that Index
set_idx = drop_data.set_index('A')
sort_idx = set_idx.sort_index()
sort_idx.head()

Unnamed: 0_level_0,B,C
A,Unnamed: 1_level_1,Unnamed: 2_level_1
0.219422,0.710549,0.279095
0.321776,0.512439,0.481819
0.340938,0.97547,0.729233
0.399669,0.993558,0.453593
0.432492,0.532161,0.700842


In [222]:
# Reset the index and use multi-level indexing
reset_idx = sort_idx.reset_index()
multi_idx = reset_idx.set_index(['A', 'B'])
multi_idx.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,C
A,B,Unnamed: 2_level_1
0.219422,0.710549,0.279095
0.321776,0.512439,0.481819
0.340938,0.97547,0.729233
0.399669,0.993558,0.453593
0.432492,0.532161,0.700842


In [228]:
# Fill the Missing Values with the value from the previous row
fill_data = df.fillna(method='ffill')
fill_data.head()

Unnamed: 0,A,B,C
0,0.432492,0.532161,0.700842
1,0.432492,0.827934,0.87432
2,0.494307,0.827934,0.857627
3,0.844698,0.394283,0.798843
4,0.340938,0.97547,0.729233


In [235]:
# Data for replacing 1 with 42
r_data = pd.DataFrame(np.random.randint(1, 69, size=(10, 10)))
r_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,15,32,56,41,31,68,25,49,47,56
1,60,41,28,39,27,46,14,67,52,21
2,9,3,33,55,12,57,5,51,45,23
3,2,19,59,39,55,32,8,12,9,4
4,44,34,50,7,39,7,17,1,40,22


In [236]:
# Replace 1's to 42
r_data.replace(1, 42)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,15,32,56,41,31,68,25,49,47,56
1,60,41,28,39,27,46,14,67,52,21
2,9,3,33,55,12,57,5,51,45,23
3,2,19,59,39,55,32,8,12,9,4
4,44,34,50,7,39,7,17,42,40,22
5,60,50,52,27,24,54,36,41,12,15
6,33,44,32,22,52,20,57,14,55,43
7,10,41,59,19,46,2,58,46,42,37
8,39,67,14,26,63,24,63,68,51,49
9,28,48,52,65,29,58,5,68,33,61


In [237]:
# Replace 1's to 42 and 7's to 420
r_data.replace([1, 7], [42, 420])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,15,32,56,41,31,68,25,49,47,56
1,60,41,28,39,27,46,14,67,52,21
2,9,3,33,55,12,57,5,51,45,23
3,2,19,59,39,55,32,8,12,9,4
4,44,34,50,420,39,420,17,42,40,22
5,60,50,52,27,24,54,36,41,12,15
6,33,44,32,22,52,20,57,14,55,43
7,10,41,59,19,46,2,58,46,42,37
8,39,67,14,26,63,24,63,68,51,49
9,28,48,52,65,29,58,5,68,33,61


In [242]:
# Load data for regex
reg_txt = pd.read_csv('missing_regex.txt')
reg_txt

Unnamed: 0,Site,X
0,a.com,1
1,b.com,2
2,c.com,3
3,d.com,4
4,e.com,5


In [248]:
# Detect all .com in 'site' column and overwrite with the keyword 'website'
reg_txt.replace(to_replace=".*.com$", value="website", regex=True)

Unnamed: 0,Site,X
0,website,1
1,website,2
2,website,3
3,website,4
4,website,5
