In [15]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.compose import TransformedTargetRegressor # Investigar
import joblib

### üß± DummyEncoder: para get_dummies como transformador de sklearn

In [2]:
class DummyEncoder():
    def __init__(self):
        self.columns_ = None

    def fit(self, X, y=None):
        X_dummies = pd.get_dummies(X)
        self.columns_ = X_dummies.columns
        return self

    def transform(self, X):
        X_dummies = pd.get_dummies(X)
        for col in self.columns_:
            if col not in X_dummies:
                X_dummies[col] = 0
        return X_dummies[self.columns_]

### ‚úÖ Postprocesador como transformador de sklearn

In [25]:
class PostProcesador():
    def fit(self, y, *_):
        return self

    def round_post(self, y):
        return np.round(y).astype(int)

### üìÑ Data ejemplo con variables categ√≥ricas

In [4]:
data = pd.DataFrame({
    'marca': ['ford', 'toyota', 'ford', 'honda'],
    'color': ['rojo', 'azul', 'verde', 'azul'],
    'precio': [10000.60, 15000.78, 12000.42, 13000]
})

### ‚ö° Definimos variables X & Y

In [5]:
X = data[['marca', 'color']]
y = data['precio']

### üß™ Construimos pipeline

In [16]:
pipeline = Pipeline([
    ('onehot', DummyEncoder()),
    ('modelo', LinearRegression())
])

#---------------Otra Manera-------------------

#pipeline = Pipeline([
#    ('onehot', DummyEncoder()),
#    ('modelo', TransformedTargetRegressor(regressor=LinearRegression(), transformer=PostProcesador())),
#])

### üß† Entrenamos

In [17]:
pipeline.fit(X, y)

### üîç Predicciones para prueba

In [18]:
X_nuevo = pd.DataFrame({
    'marca': ['toyota', 'ford'],
    'color': ['rojo', 'verde']
})

predicciones = pipeline.predict(X_nuevo)
print("Predicciones:", predicciones)

Predicciones: [12500.93 12000.42]


### üíæ Guardar pipeline & postprocesador completos

In [19]:
joblib.dump(pipeline, 'modelo_regresion.pkl')

['modelo_regresion.pkl']

In [28]:
joblib.dump(PostProcesador(), "post.pkl")

['post.pkl']

### ‚åõ Cargar pipeline

In [21]:
joblib.load("modelo_regresion.pkl")

In [22]:
joblib.load("post.pkl")

<__main__.PostProcesador at 0x7b90d19a4110>

In [29]:
pipeline = joblib.load("modelo_regresion.pkl")
post = joblib.load("post.pkl")

In [30]:
y_pred = pipeline.predict(X_nuevo)
y_final = post.round_post(y_pred)

y_pred, y_final

(array([12500.93, 12000.42]), array([12501, 12000]))