In [None]:
import pandas as pd
import joblib
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer

# Cargamos el dataset
df = sns.load_dataset('diamonds')

# Definimos columnas categóricas y numéricas
categorical_columns = ["cut", "color", "clarity"]
numerical_columns = ["carat", "depth", "table", "x", "y", "z"]

# Pipeline de preprocesamiento
preprocessor = ColumnTransformer([
    ("num", Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler())
    ]), numerical_columns),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_columns)
])

# Modelo de regresión
regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Creamos el pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", regressor)
])

# Entrenamos el modelo
X = df.drop(columns=["price"])
y = df["price"]
pipeline.fit(X, y)

# Guardamos el pipeline
joblib.dump(pipeline, "pipeline_regresion.joblib")

['pipeline_regresion.joblib']