# Replace

`df.replace(
    to_replace = None,
    value = None,
    inplace False
    limit = None,
    regex = False,
    method = 'pad'
)`

- to_replace : str, regex, dict, series, int, float, None<br>
  Finding values that will be replaced<br>

- value : scalar, dict, list, str, regex, default, None<br>
  Value to replace any values<br>

In [7]:
import pandas as pd
import numpy as np

In [2]:
df_students = pd.read_csv("./datasets/df_na.csv")

df_students

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


## Replace 'Paul' with 'Alexander'

In [3]:
df_students.replace(to_replace='Paul', value='Alexander')

Unnamed: 0,Name,Score,Grades
0,Alexander,98.0,
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


In [4]:
# replace directly
df_students.replace('Paul', 'Alexander')

Unnamed: 0,Name,Score,Grades
0,Alexander,98.0,
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


## Replace 'Paul' and 'Aaron' with 'Alexander'

In [5]:
df_students.replace(to_replace=['Paul', 'Aaron'], value='Alexander')

Unnamed: 0,Name,Score,Grades
0,Alexander,98.0,
1,Alexander,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


In [6]:
# Replace directly
df_students.replace(['Paul', 'Aaron'], 'Alexander')

Unnamed: 0,Name,Score,Grades
0,Alexander,98.0,
1,Alexander,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


## Replace 'NaN' with 78

In [8]:
df_students.replace(np.nan, 78)

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,78
1,Aaron,78.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,78
4,Paxton,90.0,AC
5,Madison,78.0,BA
6,Aurora,82.0,BB


## Replace NaN in 'Score' with 78 and NaN in 'Grades' with BC

In [10]:
# using dictionary
df_students.replace({'Score' : {np.nan : 78},
                    'Grades' : {np.nan : 'BC'}})

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,BC
1,Aaron,78.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,BC
4,Paxton,90.0,AC
5,Madison,78.0,BA
6,Aurora,82.0,BB


## Replace 'not available', np.nan, -99, in 'Grades' to 'FF'

In [11]:
df_students_min = pd.read_csv("./datasets/df_na_nav_min.csv")

df_students_min

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,not available
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB
7,Pauline,78.0,-99


In [16]:
df_students_min.replace({'Grades' : ['not available', np.nan, '-99']}, 'BC', inplace=True)
# add inplace=True to make changes to the dataframe permanently

## Replace Grades values: <br>
   BC : Remedial <br>
   AB : Very Good <br>
   AA : Excellent <br>
   BA : Good <br>

In [15]:
df_students_min.replace(['BC', 'AB', 'AA', 'BA'],
                        ['Fail', 'Very Good', 'Excellent', 'Good'])

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,not available
1,Aaron,,Very Good
2,Krista,99.0,Excellent
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,Good
6,Aurora,82.0,BB
7,Pauline,78.0,-99


## Replace NaN values with ffill

In [17]:
df_students_min

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,BC
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,BC
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB
7,Pauline,78.0,BC


In [18]:
df_students_min.replace(np.nan, method='ffill')
# df_students_min.replace(np.nan, method='ffill', limit=1)

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,BC
1,Aaron,98.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,BC
4,Paxton,90.0,AC
5,Madison,90.0,BA
6,Aurora,82.0,BB
7,Pauline,78.0,BC


## Clean data in 'Score' using regex

In [19]:
df_regex = pd.read_csv("./datasets/df_na_regex.csv")

df_regex

Unnamed: 0,Name,Score,Grades
0,Paul,98M,not available
1,Aaron,,AB
2,Krista,99M,AA
3,Veronica,87,
4,Paxton,90M,AC
5,Madison,,BA
6,Aurora,82M,BB
7,Pauline,78M,-99


In [20]:
df_regex.replace({"Score": '[A-Za-z]'}, " ", regex=True)

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,not available
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB
7,Pauline,78.0,-99


In [22]:
type(df_regex.loc[2, 'Score'])

str

## Replace NaN values in 'Score' with 0

In [23]:
df_zero = pd.read_csv("./datasets/df_na_nav.csv")

df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,not available
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


In [24]:
df_zero.replace({'Score': np.nan}, 0)

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,not available
1,Aaron,0.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,0.0,BA
6,Aurora,82.0,BB


## Replace NaN value (first value only) of 'Grades' with 'FF' using `at`

Access a single value for a row/column label pair

In [25]:
df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,not available
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


In [26]:
df_zero.at[0, 'Grades'] = 'AA'

df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,AA
1,Aaron,,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


## Replace NaN value (first value only) of 'Score' column with 0 using `iat`

iat accesses a single value for a row/column pair by integer position

In [27]:
df_zero.iat[1, 1] = 0

df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,AA
1,Aaron,0.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,,BA
6,Aurora,82.0,BB


## Replace NaN value (first value only) of 'Score' column with 0 using `at` 

Accesses a single value for a row/colum label

In [30]:
df_zero.at[5, 'Score'] = 0

df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,AA
1,Aaron,0.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,AC
5,Madison,0.0,BA
6,Aurora,82.0,BB


In [31]:
df_zero.at[4, 'Grades'] = np.nan

df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,AA
1,Aaron,0.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,
5,Madison,0.0,BA
6,Aurora,82.0,BB


## Replace consecutive NaN values of 'Grades' with 'AB' using `iloc`

`iloc` accesses a group of rows and columns by integer position

In [32]:
df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,AA
1,Aaron,0.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,
4,Paxton,90.0,
5,Madison,0.0,BA
6,Aurora,82.0,BB


In [36]:
df_zero.iloc[3:5, 2] = 'AB'

df_zero

Unnamed: 0,Name,Score,Grades
0,Paul,98.0,AA
1,Aaron,0.0,AB
2,Krista,99.0,AA
3,Veronica,87.0,AB
4,Paxton,90.0,AB
5,Madison,0.0,BA
6,Aurora,82.0,BB


## Replace consecutive NaN values of 'Grades' column with 'AB' using `loc`

Accesses a group of rows and columns by labels

In [37]:
df_cons = pd.read_csv("./datasets/df_na_cons.csv")

df_cons

Unnamed: 0,Name,Score,Grades
0,Paul,98M,not available
1,Aaron,,AB
2,Krista,99M,AA
3,Veronica,87,
4,Paxton,90M,
5,Madison,,BA
6,Aurora,82M,BB
7,Pauline,78M,-99


In [40]:
df_cons.loc[3:4, 'Grades'] = 'AB'

df_cons

Unnamed: 0,Name,Score,Grades
0,Paul,98M,not available
1,Aaron,,AB
2,Krista,99M,AA
3,Veronica,87,AB
4,Paxton,90M,AB
5,Madison,,BA
6,Aurora,82M,BB
7,Pauline,78M,-99
