In [None]:
import pandas as pd
import numpy as np

data = {
    'WorkId': [1, 2, 3, 4, 5, 6, 7, 8],
    'Age': [25, np.nan, 28, 40, np.nan, 32, 45, 27],
    'Department': ['HR', np.nan, 'HR', 'IT', 'Finance', np.nan, 'Finance', 'HR'],
    'Salary': [50000, 60000, np.nan, 80000, np.nan, 100000, 85000, 47000],
    'Perf_Score': [85, 90, np.nan, np.nan, 78, 95, 88, 79],
    'WorkHours': [8, 9, 7, 8, np.nan, np.nan, 9, 8],
    'Bonus': [100, 150, 200, 250, np.nan, 350, 400, 450],
    'N_Children': [np.nan, 1, 2, np.nan, 4, np.nan, 3, 1],
    'TimeInComp': [36, 24, np.nan, 18, 17, np.nan, 13, np.nan],    # Months
}

df = pd.DataFrame(data)

df

## **Tratamiento de nulos - Parte 1**

In [None]:
# Eliminar filas con valores nulos
df_dropna_rows = df.dropna()
df_dropna_rows

In [None]:
# Eliminar filas con valores nulos
df_dropna_rows = df.dropna(thresh=6)
df_dropna_rows

In [None]:
# Eliminar columnas con valores nulos
df_dropna_cols = df.drop(columns=['N_Children'])
df_dropna_cols

In [None]:
# Rellenar valores nulos con un valor específico
df['N_Children'] = df['N_Children'].fillna(0)
df

In [None]:
# Rellenar valores nulos con un valor específico
df_fillna_constant = df.fillna({'Age': 0, 'Salary': 50000, 'Department': 'Unknown'})
df_fillna_constant

In [None]:
# Rellenar con mediana
df['Age'] = df['Age'].fillna(df['Age'].median())
df

In [None]:
# Rellenar con moda
df['Department'] = df['Department'].fillna(df['Department'].mode()[0])
df

## **Tratamiento de nulos - Parte 2**

In [None]:
# Rellenar por agrupación
df_fill_by_group = df.copy()
df_fill_by_group['Salary'] = df_fill_by_group.groupby(
    'Department')['Salary'].transform(lambda x: x.fillna(x.mean()))
df_fill_by_group

In [None]:
df.groupby('Department')[['Salary']].mean()

In [None]:
# Solo filtrar donde Department no es NaN
df_fill_by_group = df.copy()
mask = df_fill_by_group['Department'].notna()  

df_fill_by_group.loc[mask, 'Salary'] = df_fill_by_group.loc[mask].groupby(
    'Department')['Salary'].transform(lambda x: x.fillna(x.mean()))
df_fill_by_group

In [None]:
# Interpolación lineal
df_interpolated = df.copy()
df_interpolated['TimeInComp'] = df['TimeInComp'].interpolate(method='linear')
df_interpolated

In [None]:
from sklearn.impute import KNNImputer

# Preparar datos para KNN Imputation
knn_df = df.drop(columns=['WorkId', 'Department'])
imputer = KNNImputer(n_neighbors=2)
df_imputed = pd.DataFrame(imputer.fit_transform(knn_df),
                          columns=knn_df.columns)

# Reconstruir DataFrame con datos imputados
df_knn = df.copy()
df_knn[['Salary', 'Bonus']] = df_imputed[['Salary', 'Bonus']]
df_knn

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors

# Aplicar MinMaxScaler
scaler = MinMaxScaler()
df_to_normalize = df.drop(columns=['WorkId', 'Department'])
df_scaled = pd.DataFrame(scaler.fit_transform(df_to_normalize),
                         columns=df_to_normalize.columns)

# Aplicar KNNImputer
imputer = KNNImputer(n_neighbors=2)
df_imputed = pd.DataFrame(imputer.fit_transform(df_scaled),
                          columns=df_to_normalize.columns)

# Desescalar los datos a su escala original
df_imputed_rev = pd.DataFrame(scaler.inverse_transform(df_imputed),
                              columns=df_to_normalize.columns)
df_knn = df.copy()
df_knn[['Salary', 'Bonus']] = df_imputed_rev[['Salary', 'Bonus']]
df_knn

In [None]:
# Definir el modelo de vecinos más cercanos
knn = NearestNeighbors(n_neighbors=4, metric='euclidean')
knn.fit(df_imputed.dropna())  # Ajustamos solo con datos sin nulos

# Buscamos los vecinos más cercanos de la fila con índice 3 (antes tenía nulos)
fila_index = 4
fila = pd.DataFrame([df_imputed.iloc[fila_index].values], columns=df_imputed.columns)

# Obtener los índices de los vecinos más cercanos
distancias, indices = knn.kneighbors(fila)

# Mostrar los vecinos
print('Índices de los vecinos más cercanos\n', indices)
print('\nDistancias a los vecinos\n', distancias)
print('\nFilas vecinas')
df_imputed.iloc[indices[0]]

In [None]:
df_imputed_rev.iloc[indices[0]]

## **Tratamiento de nulos - Parte 3**