# Pandas Tips: `dropna()`

In [1]:
import pandas as pd

Check your pandas version to ensure similar behavior. 

_Version 2.1.1 was released September 2023._

In [2]:
pd.__version__

'2.1.1'

### Load data

Load data from GitHub.

_Check out my [read_csv video](https://youtu.be/sTXr73fqybc) to learn more about this step._

In [3]:
df = pd.read_csv(
    'https://raw.githubusercontent.com/kimfetti/Videos/master/Pandas_Tips/data/pet_data.csv'
)

In [4]:
df.head()

Unnamed: 0,name,pet_type,food_type,amount,brand
0,Simba,Lizard,Insects,212g,
1,Winston,Ferret,Dry Food,,Pawsome
2,Bella,Parrot,Seeds,,
3,Shadow,Hamster,Pellets,58g,
4,Milo,Rabbit,Vegetables,124g,


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       491 non-null    object
 1   pet_type   490 non-null    object
 2   food_type  434 non-null    object
 3   amount     388 non-null    object
 4   brand      85 non-null     object
dtypes: object(5)
memory usage: 19.7+ KB


## Basics

### Drop rows missing any value

In [6]:
df.shape

(500, 5)

In [7]:
df.dropna()

Unnamed: 0,name,pet_type,food_type,amount,brand
9,Tiger,Fish,Pellets,173g,ChowTime
18,Charlie,Dog,Dry Kibble,84g,GreenHarvest
19,Harley,Cat,Wet Food,90g,Pawsome
21,Jasper,Rabbit,Hay,37g,SnackLand
23,Mittens,Dog,Wet Food,91g,TropicalFeast
31,Winston,Fish,Pellets,37g,GrainyGraze
38,Penny,Guinea Pig,Pellets,119g,TropicalFeast
46,Nala,Parrot,Pellets,31g,GrainyGraze
57,Loki,Dog,Wet Food,173g,Pawsome
74,Zoe,Hamster,Seeds,49g,YummyDelight


In [8]:
df.dropna().shape

(56, 5)

In [9]:
df.dropna().info()

<class 'pandas.core.frame.DataFrame'>
Index: 56 entries, 9 to 495
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       56 non-null     object
 1   pet_type   56 non-null     object
 2   food_type  56 non-null     object
 3   amount     56 non-null     object
 4   brand      56 non-null     object
dtypes: object(5)
memory usage: 2.6+ KB


### Drop rows missing all values

In [10]:
df.dropna(how='all')

Unnamed: 0,name,pet_type,food_type,amount,brand
0,Simba,Lizard,Insects,212g,
1,Winston,Ferret,Dry Food,,Pawsome
2,Bella,Parrot,Seeds,,
3,Shadow,Hamster,Pellets,58g,
4,Milo,Rabbit,Vegetables,124g,
...,...,...,...,...,...
495,Shadow,Dog,Wet Food,229g,TropicalFeast
496,Nala,Snake,Eggs,114g,
497,Finn,Turtle,Pellets,,
498,Finn,Hamster,Seeds,55g,


In [11]:
df.dropna(how='all').shape

(496, 5)

## $\star$ Level Up $\star$

### Drop rows with missings in specific column(s)

In [12]:
df.head()

Unnamed: 0,name,pet_type,food_type,amount,brand
0,Simba,Lizard,Insects,212g,
1,Winston,Ferret,Dry Food,,Pawsome
2,Bella,Parrot,Seeds,,
3,Shadow,Hamster,Pellets,58g,
4,Milo,Rabbit,Vegetables,124g,


In [13]:
df.dropna(subset='name')

Unnamed: 0,name,pet_type,food_type,amount,brand
0,Simba,Lizard,Insects,212g,
1,Winston,Ferret,Dry Food,,Pawsome
2,Bella,Parrot,Seeds,,
3,Shadow,Hamster,Pellets,58g,
4,Milo,Rabbit,Vegetables,124g,
...,...,...,...,...,...
495,Shadow,Dog,Wet Food,229g,TropicalFeast
496,Nala,Snake,Eggs,114g,
497,Finn,Turtle,Pellets,,
498,Finn,Hamster,Seeds,55g,


In [14]:
df.dropna(subset='name').info()

<class 'pandas.core.frame.DataFrame'>
Index: 491 entries, 0 to 499
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       491 non-null    object
 1   pet_type   489 non-null    object
 2   food_type  431 non-null    object
 3   amount     383 non-null    object
 4   brand      84 non-null     object
dtypes: object(5)
memory usage: 23.0+ KB


In [15]:
df.dropna(subset=['name', 'pet_type'])

Unnamed: 0,name,pet_type,food_type,amount,brand
0,Simba,Lizard,Insects,212g,
1,Winston,Ferret,Dry Food,,Pawsome
2,Bella,Parrot,Seeds,,
3,Shadow,Hamster,Pellets,58g,
4,Milo,Rabbit,Vegetables,124g,
...,...,...,...,...,...
495,Shadow,Dog,Wet Food,229g,TropicalFeast
496,Nala,Snake,Eggs,114g,
497,Finn,Turtle,Pellets,,
498,Finn,Hamster,Seeds,55g,


In [16]:
df.dropna(subset=['name', 'pet_type']).info()

<class 'pandas.core.frame.DataFrame'>
Index: 489 entries, 0 to 499
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       489 non-null    object
 1   pet_type   489 non-null    object
 2   food_type  429 non-null    object
 3   amount     381 non-null    object
 4   brand      84 non-null     object
dtypes: object(5)
memory usage: 22.9+ KB


In [17]:
df.dropna(subset=['name', 'pet_type'], how='all') #only drop rows missing both name and pet_type

Unnamed: 0,name,pet_type,food_type,amount,brand
0,Simba,Lizard,Insects,212g,
1,Winston,Ferret,Dry Food,,Pawsome
2,Bella,Parrot,Seeds,,
3,Shadow,Hamster,Pellets,58g,
4,Milo,Rabbit,Vegetables,124g,
...,...,...,...,...,...
495,Shadow,Dog,Wet Food,229g,TropicalFeast
496,Nala,Snake,Eggs,114g,
497,Finn,Turtle,Pellets,,
498,Finn,Hamster,Seeds,55g,


In [18]:
df.dropna(subset=['name', 'pet_type'], how='all').info()

<class 'pandas.core.frame.DataFrame'>
Index: 492 entries, 0 to 499
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       491 non-null    object
 1   pet_type   490 non-null    object
 2   food_type  432 non-null    object
 3   amount     384 non-null    object
 4   brand      84 non-null     object
dtypes: object(5)
memory usage: 23.1+ KB


### Drop columns with missings

In [19]:
df.dropna(axis=1)

0
1
2
3
4
...
495
496
497
498
499


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       491 non-null    object
 1   pet_type   490 non-null    object
 2   food_type  434 non-null    object
 3   amount     388 non-null    object
 4   brand      85 non-null     object
dtypes: object(5)
memory usage: 19.7+ KB


In [21]:
df.dropna(axis=1, how='all')

Unnamed: 0,name,pet_type,food_type,amount,brand
0,Simba,Lizard,Insects,212g,
1,Winston,Ferret,Dry Food,,Pawsome
2,Bella,Parrot,Seeds,,
3,Shadow,Hamster,Pellets,58g,
4,Milo,Rabbit,Vegetables,124g,
...,...,...,...,...,...
495,Shadow,Dog,Wet Food,229g,TropicalFeast
496,Nala,Snake,Eggs,114g,
497,Finn,Turtle,Pellets,,
498,Finn,Hamster,Seeds,55g,


### Set threshold for number of missings

In [22]:
missing_threshold = int(df.shape[0]*.50)

missing_threshold

250

In [23]:
df.dropna(axis=1, thresh=missing_threshold) #get rid of columns missing more than half of entries

Unnamed: 0,name,pet_type,food_type,amount
0,Simba,Lizard,Insects,212g
1,Winston,Ferret,Dry Food,
2,Bella,Parrot,Seeds,
3,Shadow,Hamster,Pellets,58g
4,Milo,Rabbit,Vegetables,124g
...,...,...,...,...
495,Shadow,Dog,Wet Food,229g
496,Nala,Snake,Eggs,114g
497,Finn,Turtle,Pellets,
498,Finn,Hamster,Seeds,55g


In [24]:
df.isna().sum()

name           9
pet_type      10
food_type     66
amount       112
brand        415
dtype: int64

### Make permanent changes

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       491 non-null    object
 1   pet_type   490 non-null    object
 2   food_type  434 non-null    object
 3   amount     388 non-null    object
 4   brand      85 non-null     object
dtypes: object(5)
memory usage: 19.7+ KB


In [26]:
df.dropna(how='all', inplace=True)

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 496 entries, 0 to 499
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       491 non-null    object
 1   pet_type   490 non-null    object
 2   food_type  434 non-null    object
 3   amount     388 non-null    object
 4   brand      85 non-null     object
dtypes: object(5)
memory usage: 23.2+ KB
