In [None]:
#| default_exp basic_numerical_summaries

# Essential numerical summaries of missing values

> This section introduces you to multiple essential functions to **summarize** missing values into a **single number**.

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas as pd
from pandas_missing.Missing import PandasMissingDataFrame, PandasMissingSeries
from fastcore.basics import patch

# Series summaries

## Dummy data for demostration purposes

In [None]:
s = pd.Series(range(0, 100))
s[40:60] = None
s[90:95] = None

s

0      0.0
1      1.0
2      2.0
3      3.0
4      4.0
      ... 
95    95.0
96    96.0
97    97.0
98    98.0
99    99.0
Length: 100, dtype: float64

## Missingness number

In [None]:
#| export
@patch
def number_missing(self: PandasMissingSeries):
    """Return the number of missing values in Series."""
    return self._s.isna().sum()

In [None]:
s.missing.number_missing()

25

In [None]:
#| export
@patch
def number_complete(self: PandasMissingSeries):
    """Return the number of non-missing values in the Series."""
    return self._s.notna().sum()

In [None]:
s.missing.number_complete()

75

## Missigness proportion

In [None]:
#| export
@patch
def proportion_missing(self: PandasMissingSeries):
    """Return the proportion of missing values in the Series."""
    return self._s.isna().mean()

In [None]:
s.missing.proportion_missing()

0.25

In [None]:
#| export
@patch
def proportion_complete(self: PandasMissingSeries):
    """Return the proportion of non-missing values in the Series"""
    return self._s.notna().mean()

In [None]:
s.missing.proportion_complete()

0.75

## Missingness percentage

In [None]:
#| export
@patch
def percentage_missing(self: PandasMissingSeries):
    """Return the percentage of missing values in the Series"""
    return self._s.missing.proportion_missing() * 100

In [None]:
s.missing.percentage_missing()

25.0

In [None]:
#| export
@patch
def percentage_complete(self: PandasMissingSeries):
    """Return the percentage of non-missing values in the Series"""
    return self._s.missing.proportion_complete() * 100

In [None]:
s.missing.percentage_complete()

75.0

# DataFrame summaries

## Dummy data for demostration purposes

In [None]:
import pandas as pd

df = pd.DataFrame.from_dict(
    {
       "a": range(0, 10),
       "b": range(10, 20),
       "c": range(20, 30),
       "d": range(30, 40),
       "e": range(40, 50)
    }
)

df.iloc[1:4, 0] = None
df.iloc[9, 0] = None
df.iloc[5:7, 1] = None

df

Unnamed: 0,a,b,c,d,e
0,0.0,10.0,20,30,40
1,,11.0,21,31,41
2,,12.0,22,32,42
3,,13.0,23,33,43
4,4.0,14.0,24,34,44
5,5.0,,25,35,45
6,6.0,,26,36,46
7,7.0,17.0,27,37,47
8,8.0,18.0,28,38,48
9,,19.0,29,39,49


## Overall missingness number

In [None]:
#| export
@patch
def number_missing(self: PandasMissingDataFrame):
    """Return the number of missing values in the entire DataFrame."""
    return self._df.isna().sum().sum()

In [None]:
df.missing.number_missing()

6

In [None]:
#| export
@patch
def number_complete(self: PandasMissingDataFrame):
    """Return the number of non-missing values in the entire DataFrame."""
    return self._df.size - self._df.missing.number_missing()

In [None]:
df.missing.number_complete()

44

## Overall missigness proportion

In [None]:
#| export
@patch
def proportion_missing(self: PandasMissingDataFrame):
    """Return the proportion of missing values in the entire DataFrame."""
    return self._df.isna().stack().mean()

In [None]:
df.missing.proportion_missing()

0.12

In [None]:
#| export
@patch
def proportion_complete(self: PandasMissingDataFrame):
    """Return the proportion of non-missing values in the entire DataFrame."""
    return self._df.notna().stack().mean()

In [None]:
df.missing.proportion_complete()

0.88

## Overall missingness percentage

In [None]:
#| export
@patch
def percentage_missing(self: PandasMissingDataFrame):
    """Return the percentage of missing values in the entire DataFrame."""
    return self._df.missing.proportion_missing() * 100

In [None]:
df.missing.percentage_missing()

12.0

In [None]:
#| export
@patch
def percentage_complete(self: PandasMissingDataFrame):
    """Return the percentage of non-missing values in the entire DataFrame."""
    return self._df.missing.proportion_complete() * 100

In [None]:
df.missing.percentage_complete()

88.0

## Missingness number throughout variables

In [None]:
#| export
@patch
def number_variable_missing(self: PandasMissingDataFrame):
    """Return the number of variables with missing values."""
    return self._df.isna().any().sum()

In [None]:
df.missing.number_variable_missing()

2

In [None]:
#| export
@patch
def number_variable_complete(self: PandasMissingDataFrame):
    """Return the number of variables with non-missing values."""
    return self._df.notna().all().sum()

In [None]:
df.missing.number_variable_complete()

3

## Missingness proportion throughout variables

In [None]:
#| export
@patch
def proportion_variable_missing(self: PandasMissingDataFrame):
    """Return the proportion of variables with missing values."""
    return self._df.isna().any().mean()

In [None]:
df.missing.proportion_variable_missing()

0.4

In [None]:
#| export
@patch
def proportion_variable_complete(self: PandasMissingDataFrame):
    """Return the proportion of variables with non-missing values."""
    return self._df.notna().all().mean()

In [None]:
df.missing.proportion_variable_complete()

0.6

## Missingness percentage throughout variables

In [None]:
#| export
@patch
def percentage_variable_missing(self: PandasMissingDataFrame):
    """Return the percentage of variables with missing values."""
    return self._df.missing.proportion_variable_missing() * 100

In [None]:
df.missing.percentage_variable_missing()

40.0

In [None]:
#| export
@patch
def percentage_variable_complete(self: PandasMissingDataFrame):
    """Return the percentage of variables with non-missing values."""
    return self._df.missing.proportion_variable_complete() * 100

In [None]:
df.missing.percentage_variable_complete()

60.0

## Missingness number throughout cases

In [None]:
#| export
@patch
def number_case_missing(self: PandasMissingDataFrame):
    """Return the number of cases with missing values."""
    return self._df.isna().any(axis=1).sum()

In [None]:
df.missing.number_case_missing()

6

In [None]:
#| export
@patch
def number_case_complete(self: PandasMissingDataFrame):
    """Return the number of cases with non-missing values."""
    return self._df.notna().all(axis=1).sum()

In [None]:
df.missing.number_case_complete()

4

## Missingness proportion throughout cases

In [None]:
#| export
@patch
def proportion_case_missing(self: PandasMissingDataFrame):
    """Return the proportion of cases with missing values."""
    return self._df.isna().any(axis=1).mean()

In [None]:
df.missing.proportion_case_missing()

0.6

In [None]:
#| export
@patch
def proportion_case_complete(self: PandasMissingDataFrame):
    """Return the proportion of cases with non-missing values."""
    return self._df.notna().all(axis=1).mean()

In [None]:
df.missing.proportion_case_complete()

0.4

## Missingness percentage throughout cases

In [None]:
#| export
@patch
def percentage_case_missing(self: PandasMissingDataFrame):
    """Return the percentage of cases with missing values."""
    return self._df.missing.proportion_case_missing() * 100

In [None]:
df.missing.percentage_case_missing()

60.0

In [None]:
#| export
@patch
def percentage_case_complete(self: PandasMissingDataFrame):
    """Return the percentage of cases with non-missing values."""
    return self._df.missing.proportion_case_complete() * 100

In [None]:
df.missing.percentage_case_complete()

40.0