In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

datos = {
    "alfa": [1, 2, 4, 8, np.nan, 9, 45],
    "beta": [8, 14, np.nan, 6, 1, 6, 46],
    "gamma": [5, 4, 4, 4, 4, 4, np.nan],
    "salida": [np.nan, 1, 18, 14, 10, 1, 19]
}

df = pd.DataFrame(data=datos)
copia = df.copy()
df

Unnamed: 0,alfa,beta,gamma,salida
0,1.0,8.0,5.0,
1,2.0,14.0,4.0,1.0
2,4.0,,4.0,18.0
3,8.0,6.0,4.0,14.0
4,,1.0,4.0,10.0
5,9.0,6.0,4.0,1.0
6,45.0,46.0,,19.0


In [2]:
# ¿Cuantos valores desconocidos tenemos por variable?
df.isna().sum()

alfa      1
beta      1
gamma     1
salida    1
dtype: int64

In [3]:
df.isnull().sum()

alfa      1
beta      1
gamma     1
salida    1
dtype: int64

In [4]:
df.dropna(inplace=True)
df

Unnamed: 0,alfa,beta,gamma,salida
1,2.0,14.0,4.0,1.0
3,8.0,6.0,4.0,14.0
5,9.0,6.0,4.0,1.0


In [5]:
df = copia.copy()
df.fillna(value=0, inplace=True)
df

Unnamed: 0,alfa,beta,gamma,salida
0,1.0,8.0,5.0,0.0
1,2.0,14.0,4.0,1.0
2,4.0,0.0,4.0,18.0
3,8.0,6.0,4.0,14.0
4,0.0,1.0,4.0,10.0
5,9.0,6.0,4.0,1.0
6,45.0,46.0,0.0,19.0


In [6]:
df = copia.copy()
df["alfa"].fillna(value=df["alfa"].mean(), inplace=True)
df["beta"].fillna(value=df["beta"].median(), inplace=True)
df["gamma"].fillna(value=df["gamma"].mode()[0], inplace=True)
df

Unnamed: 0,alfa,beta,gamma,salida
0,1.0,8.0,5.0,
1,2.0,14.0,4.0,1.0
2,4.0,7.0,4.0,18.0
3,8.0,6.0,4.0,14.0
4,11.5,1.0,4.0,10.0
5,9.0,6.0,4.0,1.0
6,45.0,46.0,4.0,19.0


In [7]:
df = copia.copy()

from sklearn.impute import SimpleImputer

si = SimpleImputer(missing_values=np.nan, strategy="mean")
df = pd.DataFrame(data= si.fit_transform(df), columns=df.columns)
df

Unnamed: 0,alfa,beta,gamma,salida
0,1.0,8.0,5.0,10.5
1,2.0,14.0,4.0,1.0
2,4.0,13.5,4.0,18.0
3,8.0,6.0,4.0,14.0
4,11.5,1.0,4.0,10.0
5,9.0,6.0,4.0,1.0
6,45.0,46.0,4.166667,19.0


In [8]:
df = copia.copy()

si = SimpleImputer(missing_values=np.nan, strategy="median").set_output(transform="pandas")
df = si.fit_transform(df)
df

Unnamed: 0,alfa,beta,gamma,salida
0,1.0,8.0,5.0,12.0
1,2.0,14.0,4.0,1.0
2,4.0,7.0,4.0,18.0
3,8.0,6.0,4.0,14.0
4,6.0,1.0,4.0,10.0
5,9.0,6.0,4.0,1.0
6,45.0,46.0,4.0,19.0


In [9]:
from sklearn.impute import KNNImputer
df = copia.copy()

knn = KNNImputer(missing_values=np.nan).set_output(transform="pandas")
df = knn.fit_transform(df)
df

Unnamed: 0,alfa,beta,gamma,salida
0,1.0,8.0,5.0,8.8
1,2.0,14.0,4.0,1.0
2,4.0,7.0,4.0,18.0
3,8.0,6.0,4.0,14.0
4,4.8,1.0,4.0,10.0
5,9.0,6.0,4.0,1.0
6,45.0,46.0,4.0,19.0
