# Tutorial to extend pandas API

## Importar librerias

In [1]:
import pandas as pd

## Datos de prueba

In [2]:
df = pd.DataFrame.from_dict(
    data = {
        'a' : list('asdfghjklñ'),
        'b' : range(0,10)
    }
)

df.iloc[2:5, 0] = None
df.iloc[6:7, 1] = None

df

Unnamed: 0,a,b
0,a,0.0
1,s,1.0
2,,2.0
3,,3.0
4,,4.0
5,h,5.0
6,j,
7,k,7.0
8,l,8.0
9,ñ,9.0


In [3]:
df['a'].str

<pandas.core.strings.accessor.StringMethods at 0x1eadb2c7510>

In [4]:
# df.missing
# AttributeError: 'DataFrame' object has no attribute 'missing'

## Crear una nueva clase para extender pandas

In [5]:
@pd.api.extensions.register_dataframe_accessor('missing')
class MissingMethods:
    def __init__(self, pandas_obj):
        self._df = pandas_obj

    def number_missing(self):
        return self._df.isna().sum().sum()
    
    def number_complete(self):
        return self._df.notna().sum().sum()
    
    # Otra forma de hacerlo, reutilizando funciones
    # def number_complete(self):
        # return delf._df.size - self._df.missing.number_missing()

### Probar uso

In [6]:
df = pd.DataFrame(df)
df

Unnamed: 0,a,b
0,a,0.0
1,s,1.0
2,,2.0
3,,3.0
4,,4.0
5,h,5.0
6,j,
7,k,7.0
8,l,8.0
9,ñ,9.0


In [7]:
df.missing

<__main__.MissingMethods at 0x1eadb2cdd10>

In [8]:
df.missing.number_missing()

4

In [9]:
df.missing.number_complete()

16

In [10]:
try:
    del pd.DataFrame.missing
except AttributeError:
    pass

@pd.api.extensions.register_dataframe_accessor("missing")
class DontMissMe:
    def __init__(self, pandas_obj):
        self._obj = pandas_obj

    def number_missing(self):
        return self._obj.isna().sum().sum()

    def number_complete(self):
        return self._obj.size - self._obj.missing.number_missing()

    def proportion_missing(self):
        pass

In [11]:
df = pd.DataFrame(df)
df

Unnamed: 0,a,b
0,a,0.0
1,s,1.0
2,,2.0
3,,3.0
4,,4.0
5,h,5.0
6,j,
7,k,7.0
8,l,8.0
9,ñ,9.0


In [12]:
df.missing.number_missing()

4

In [13]:
df.missing.number_complete()

16

In [14]:
df.missing.proportion_missing()