In [4]:
import pandas as pd
import numpy as np

In [44]:
people = {
    'first': ['Corey', 'Jane', 'John', 'Chris', np.NAN, None, 'NA'], 
    'last': ['Schafer', 'Doe', 'Doe', 'Schafer', np.NAN, np.NAN, 'Missing'], 
    'email': ['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com', None, np.NAN, 'Anonymous@email.com', 'NA'],
    'age': ['33', '55', '63', '36', None, None, 'Missing']
}
df = pd.DataFrame(people)
# Replace the custom missing values for np.NAN, so the dropna can identify it as a missing value
df.replace("NA", np.NAN, inplace = True)
df.replace("Missing", np.NAN, inplace = True)

In [23]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [24]:
# Dropna with default arguments
df.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [25]:
# axis = "index" means it will drop rows with missing values (alt = "columns")
# how = "any" will drop if any value of row is missing
df.dropna(axis = "index", how = "any")

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [26]:
# how = "all" will drop a row only if all the values are missing
df.dropna(axis = "index", how = "all")

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
5,,,Anonymous@email.com,


In [27]:
df.dropna(axis = "columns", how = "all")

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [28]:
# it will drop the rows with missing values for the email column
# any or all works the same with only on column in the subset 
df.dropna(axis = "index", how = "any", subset = ["email"])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
5,,,Anonymous@email.com,


In [29]:
# Drop rows if email AND last is missing
df.dropna(axis = "index", how = "all", subset = ["email", "last"])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
5,,,Anonymous@email.com,


In [30]:
# Drop rows if email OR last is missing
# axis = "index" is default and can be omitted
df.dropna(how = "any", subset = ["last", "email"])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63


In [31]:
# Show the DataFrame as True or False for missing values
df.isna()

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,True,False
4,True,True,True,True
5,True,True,False,True
6,True,True,True,True


In [32]:
# Replace na values for the string MISSING
df.fillna("MISSING")

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Doe,JohnDoe@email.com,63
3,Chris,Schafer,MISSING,36
4,MISSING,MISSING,MISSING,MISSING
5,MISSING,MISSING,Anonymous@email.com,MISSING
6,MISSING,MISSING,MISSING,MISSING


In [42]:
df.dtypes

first    object
last     object
email    object
age       int32
dtype: object

In [38]:
# Fill the missing values for the column age as 0
df["age"].fillna(0, inplace = True)

In [40]:
# Convert the values to int32
df["age"] = df["age"].astype(int)

In [45]:
df["age"].mean()

TypeError: can only concatenate str (not "int") to str

In [46]:
df["age"] = df["age"].astype(float)

In [48]:
df.mean()

age    46.75
dtype: float64

In [49]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Doe,JohnDoe@email.com,63.0
3,Chris,Schafer,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,
