## Importar librerías

In [1]:
import pandas as pd

## Datos de prueba

In [16]:
df = pd.DataFrame.from_dict(
    data = {
        "a": list("asdfasdfas"),
        "b": range(0, 10)
    }
)

df.iloc[2:5, 0] = None # .iloc[fila,col] -> df.iloc[[2,3,4], 0]
df.iloc[6:7, 1] = None # .iloc[fila,col] -> df.iloc[6, 1]

df

Unnamed: 0,a,b
0,a,0.0
1,s,1.0
2,,2.0
3,,3.0
4,,4.0
5,s,5.0
6,d,
7,f,7.0
8,a,8.0
9,s,9.0


In [19]:
def missing_case_summary(self) -> pd.DataFrame:
        return self._obj.assign(
            case=lambda df: df.index,
            n_missing=lambda df: df.apply(
                axis="columns", func=lambda row: row.isna().sum()
            ),
            pct_missing=lambda df: df["n_missing"] / df.shape[1] * 100,
        )[["case", "n_missing", "pct_missing"]]

In [23]:
df.a.str

<pandas.core.strings.accessor.StringMethods at 0x7f4d490e4e50>

In [22]:
# Esto te dará una lista de los métodos y atributos disponibles en el objeto StringMethods.
print(dir(df.a.str))

['__annotations__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__frozen', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_data', '_doc_args', '_freeze', '_get_series_list', '_index', '_inferred_dtype', '_is_categorical', '_is_string', '_name', '_orig', '_parent', '_validate', '_wrap_result', 'capitalize', 'casefold', 'cat', 'center', 'contains', 'count', 'decode', 'encode', 'endswith', 'extract', 'extractall', 'find', 'findall', 'fullmatch', 'get', 'get_dummies', 'index', 'isalnum', 'isalpha', 'isdecimal', 'isdigit', 'islower', 'isnumeric', 'isspace', 'istitle', 'isupper', 'join', 'len', 'ljust', 'lower', 'lstrip', 'match', 'normalize', 'pad', 'partition', 'removeprefix', 'removesuffix', 'repeat', 'replace',

In [None]:
# Necesitamos una clase y nombre para acceder a métodos contenidos enla clase

In [18]:
def missing_case_summary_2(df) -> pd.DataFrame:
    return pd.DataFrame({
        "case": df.index,
        "n_missing": df.apply(lambda row: row.isna().sum(), axis=1),
        "pct_missing": df.apply(lambda row: row.isna().sum() / len(row) * 100, axis=1),
    })

# Crear el DataFrame
df_2 = pd.DataFrame.from_dict(
    data={
        "a": list("asdfasdfas"),
        "b": range(0, 10)
    }
)

df_2.iloc[2:5, 0] = None
df_2.iloc[2:7, 1] = None

# Llamar a la función con el DataFrame como argumento
result = missing_case_summary_2(df_2)

# Mostrar el resultado
print(result)


   case  n_missing  pct_missing
0     0          0          0.0
1     1          0          0.0
2     2          2        100.0
3     3          2        100.0
4     4          2        100.0
5     5          1         50.0
6     6          1         50.0
7     7          0          0.0
8     8          0          0.0
9     9          0          0.0


## Crear una nueva clase para extender pandas

In [25]:
# Ejemplo de funcionanmiento del decorador s/ ChatGPT

def mi_decorador(funcion):
    def nueva_funcion():
        print("Antes de llamar a la función")
        funcion()
        print("Después de llamar a la función")

    return nueva_funcion

@mi_decorador
def funcion_a_decorar():
    print("¡Esta es la función que queremos decorar!")

# Llamamos a la función decorada
funcion_a_decorar()

Antes de llamar a la función
¡Esta es la función que queremos decorar!
Después de llamar a la función


In [66]:
@pd.api.extensions.register_dataframe_accessor('missingg')
class MissingMethods:
    def __init__(self, pandas_obj):
        self._df = pandas_obj
    
    def number_missing(self):
        return self._df.isna().sum().sum()
    def number_complete(self):
        return self._df.notna().sum().sum()
    def number_size(self):        
        return self._df.size
    def number_complete_2(self):
        return self._df.missingg.number_size() - self._df.missingg.number_missing()
    def proportion_missing(self):
        pass # dime que existe, aunque no esté codificada.

## Probar uso

In [67]:
# actualizamos el dataframe para que se guarden los cambios
# se debe actualizar si modificacmos la clase MissingMethods
df = pd.DataFrame(df)
#df

In [68]:
print(df.missingg.number_missing())
print(df.missingg.number_complete())
print(df.missingg.number_size())
print(df.missingg.number_complete_2())
print(df.missingg.proportion_missing())

4
16
20
16
None


In [327]:
%run pandas-missing-extension.ipynb

In [328]:
# Crear el DataFrame
df_3 = pd.DataFrame.from_dict(
    data={
        "a": list("asdfasdfaszzz"),
        "b": range(0, 13),
        "c": range(0, 13),
        "d": range(0, 13),
    }
)

df_3 = pd.DataFrame(df_3)

df_3.iloc[2:6, 0] = None # .iloc[fila,col] -> df.iloc[[2,3,4], 0]
df_3.iloc[2:3, 1] = None 
df_3.iloc[2:4, 2] = None
df_3.iloc[2:8, 3] = None

df_3 = pd.DataFrame(df_3)

print(df_3)

       a     b     c     d
0      a   0.0   0.0   0.0
1      s   1.0   1.0   1.0
2   None   NaN   NaN   NaN
3   None   3.0   NaN   NaN
4   None   4.0   4.0   NaN
5   None   5.0   5.0   NaN
6      d   6.0   6.0   NaN
7      f   7.0   7.0   NaN
8      a   8.0   8.0   8.0
9      s   9.0   9.0   9.0
10     z  10.0  10.0  10.0
11     z  11.0  11.0  11.0
12     z  12.0  12.0  12.0


In [329]:
#print("number_missing:\n",df_3.missing.number_missing(),"\n")
#print("number_complete:\n",df_3.missing.number_complete(),"\n")
#print("missing_variable_summary:\n",df_3.missing.missing_variable_summary(),"\n")
#print("missing_case_summary:\n",df_3.missing.missing_case_summary(),"\n")
print("missing_variable_table:\n",df_3.missing.missing_variable_table(),"\n")
print("missing_case_table:\n",df_3.missing.missing_case_table(),"\n")

missing_variable_table:
    n_missing_in_variable  pct_variables
3                      6      46.153846
2                      4      30.769231
1                      2      15.384615
0                      1       7.692308 

missing_case_table:
    n_missing_in_case  n_cases   pct_case
0                  0        7  53.846154
1                  1        2  15.384615
2                  2        2  15.384615
3                  3        1   7.692308
4                  4        1   7.692308 



<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=694a3d08-7f18-421d-9e2f-c2820a79680e' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>