# Visualizar pipelines

In [None]:
import pandas as pd

ames_housing = pd.read_csv("../../data/house-prices/full.csv", na_values='?')

target_name = "SalePrice"
data, target = ames_housing.drop(columns=target_name), ames_housing[target_name]
target = (target > 200_000).astype(int)

In [None]:
data

In [None]:
# por simplicidad, podemos elegir algunas características y solo retener este subconjunto arbitrario de datos:

numeric_features = ['LotArea', 'FullBath', 'HalfBath']
categorical_features = ['Neighborhood', 'HouseStyle']
data = data[numeric_features + categorical_features]

## Crear el pipeline

In [None]:
# El primer paso es definir los pasos de preprocesamiento

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler(),
)])

categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [None]:
numeric_transformer

In [None]:
# Aplicar las transformaciones usando ColumnTransformer

from sklearn.compose import ColumnTransformer

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features),
])

In [None]:
# definimos el modelo y unimos los pasos en orden

from sklearn.linear_model import LogisticRegression

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression()),
])

In [None]:
model

# Puntuar del modelo

In [None]:
from sklearn.model_selection import cross_validate

cv_results = cross_validate(model, data, target, cv=5)
scores = cv_results["test_score"]
print("La precisión media de la validación cruzada es: "
      f"{scores.mean():.3f} ± {scores.std():.3f}")