# Laboratorio 2 - Complejidad y búsqueda de hiperparámetros
### Sergio David Ferreira - Juan David Gutierrez


## Librerías utilizadas

In [None]:
import random
import pandas as pd
import numpy as np
from sklearn import set_config
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline, FunctionTransformer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, MinMaxScaler, PolynomialFeatures
from sklearn.compose import ColumnTransformer


## Carga de datos y partición de datos
Para la carga de datos se carga el archivo del laboratorio anterior, se eliminan pacientes duplicados y outliers y valores faltantes sobre la columna target, *CVD Risk Score*

In [77]:
data = pd.read_csv('data/Datos Lab 1.csv')

# Normalizamos fechas
data['Date of Service'] = pd.to_datetime(data['Date of Service'], format='mixed')

# Eliminamos pacientes con registros duplicados manteniendo el más reciente
data = data.sort_values(['Patient ID', 'Date of Service'], ascending=[True, False])
data = data.drop_duplicates(subset='Patient ID', keep='first')

# Eliminamos registros con valores faltantes en el target
data = data.dropna(subset=['CVD Risk Score'])

# Eliminamos registros de outliers en la variable target
q1 = data['CVD Risk Score'].quantile(0.25)
q3 = data['CVD Risk Score'].quantile(0.75)
iqr = q3 - q1

limite_inferior = q1 - 1.5 * iqr
limite_superior = q3 + 1.5 * iqr

data = data[(data['CVD Risk Score'] > limite_inferior) & (data['CVD Risk Score'] < limite_superior)]

#Mostramos un sample de los datos
data.sample(3)

Unnamed: 0,Patient ID,Date of Service,Sex,Age,Weight (kg),Height (m),BMI,Abdominal Circumference (cm),Blood Pressure (mmHg),Total Cholesterol (mg/dL),...,Physical Activity Level,Family History of CVD,Height (cm),Waist-to-Height Ratio,Systolic BP,Diastolic BP,Blood Pressure Category,Estimated LDL (mg/dL),CVD Risk Score,CVD Risk Level
869,LcWw5698,2024-02-16,M,43.0,73.5,1.62,28.0,86.7,111/87,200.0,...,High,N,162.0,0.535,111.0,87.0,Hypertension Stage 1,108.0,15.15,LOW
48,FTEC4446,2022-11-05,F,73.0,66.074,2.146,27.461,76.599,112/106,197.0,...,Low,Y,195.82,0.391,112.0,106.0,Hypertension Stage 2,112.0,15.325,INTERMEDIARY
93,GjTt5411,2020-02-16,M,37.0,79.3,1.86,22.9,73.8,118/89,286.0,...,Low,N,186.0,0.397,118.0,89.0,Hypertension Stage 1,,18.2,HIGH


Ahora, separamos los datos en entrenamiento y prueba

In [78]:
target = 'CVD Risk Score'
X = data.drop(columns=[target])
y = data[target]

In [79]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=1)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_train.shape)

(1038, 23) (1038,)
(260, 23) (1038,)


## Construcción del pipeline

Para la construcción del pipeline se sigue el procedimiento del laboratorio anterior, por lo tanto únicamente se enuncian los pasos sin añadir justificaciones del proceso.

### Codificación de variables categóricas

In [80]:
numeric_features = [
    "Age", "Weight (kg)", "Height (m)", "Height (cm)", "BMI", "Abdominal Circumference (cm)", 
    "Total Cholesterol (mg/dL)", "HDL (mg/dL)", "Fasting Blood Sugar (mg/dL)", 
    "Waist-to-Height Ratio", "Systolic BP", "Diastolic BP", "Estimated LDL (mg/dL)"
]

ordinal_features = [
    "Physical Activity Level", "Blood Pressure Category"
]

binary_features = [
    "Sex", "Smoking Status", "Diabetes Status", "Family History of CVD"
]

binary_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(drop='if_binary', handle_unknown='ignore')),
    ('imputer', SimpleImputer(strategy=lambda x: random.randint(0, 1)))
])

ordinal_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('ordinal', OrdinalEncoder(categories = [
        ['Low', 'Moderate', 'High'],  # Physical Activity Level
        ['Normal', 'Elevated', 'Hypertension Stage 1', 'Hypertension Stage 2']  # Blood Pressure Category
    ]))
])


### Eliminación de columnas redundantes o innecesarias

In [81]:
col_drop = ['Blood Pressure (mmHg)', 'Patient ID', 'Date of Service', 'CVD Risk Level']
def drop_columns(df):
    return df.drop(columns=col_drop, errors='ignore')

dropper = FunctionTransformer(drop_columns)

### Manejo de outliers por rangos válidos de las variables

In [82]:
rangos = {
    'Diastolic BP': (40, 125),
    'Systolic BP': (80, 190),
    'Fasting Blood Sugar (mg/dL)': (60, 200),
    'Total Cholesterol (mg/dL)': (80, 320),
    'HDL (mg/dL)': (20, 100),
    'Estimated LDL (mg/dL)': (0, 250),
    'BMI': (15, 50),
    'Weight (kg)': (30, 140),
    'Height (m)': (1.45, 2),
    'Height (cm)': (145, 200),
    'Abdominal Circumference (cm)': (60, 130),
    'Waist-to-Height Ratio': (0.35, 0.8),
    'Age': (18, 100)
}
def limpiar_rangos(df):
    for col, (min_val, max_val) in rangos.items():
        if col in df.columns:
            df.loc[
                (df[col] <= min_val) | (df[col] >= max_val),
                col
            ] = np.nan
    return df
rangos_validos = FunctionTransformer(limpiar_rangos)
numeric_transformer = Pipeline(steps=[
    ('rangos', rangos_validos)
])

### Imputación de valores faltantes
Variables sin multicolinealidad

In [83]:
clean_numeric_data = numeric_transformer.fit_transform(data)
clean_numeric_data = pd.DataFrame(clean_numeric_data, columns=numeric_features)

median_columns = ['Age', 'Diastolic BP', 'Systolic BP', 'Fasting Blood Sugar (mg/dL)', 'HDL (mg/dL)']
mean_columns = ['Weight (kg)', 'BMI']

def median_transformer_func(X):
    for col in median_columns:
        X[col] = X[col].fillna(clean_numeric_data[col].median())
    return X

def mean_transformer_func(X):
    for col in mean_columns:
        X[col] = X[col].fillna(clean_numeric_data[col].mean())
    return X

median_transformer = FunctionTransformer(median_transformer_func)
mean_transformer = FunctionTransformer(mean_transformer_func)
simple_imputer_transformer = ColumnTransformer(transformers=[
    ('median', median_transformer, median_columns),
    ('mean', mean_transformer, mean_columns)
])

Variables con alta colinealidad

In [84]:
# Waist to height ratio
temp_abdominal = clean_numeric_data['Abdominal Circumference (cm)'].fillna(clean_numeric_data['Abdominal Circumference (cm)'].mean())
k_waist_to_height_ratio = (clean_numeric_data['Waist-to-Height Ratio'] / temp_abdominal).dropna().median()

# Abdominal circumference
k_abdominal_circumference = (clean_numeric_data['Abdominal Circumference (cm)'] / clean_numeric_data['Waist-to-Height Ratio']).dropna().median()

# Total cholesterol
temp_ldl = clean_numeric_data['Estimated LDL (mg/dL)'].fillna(clean_numeric_data['Estimated LDL (mg/dL)'].mean())
k_total_cholesterol = (clean_numeric_data['Total Cholesterol (mg/dL)'] / temp_ldl).dropna().median()

# Estimated LDL
k_estimated_ldl = (clean_numeric_data['Estimated LDL (mg/dL)'] / clean_numeric_data['Total Cholesterol (mg/dL)']).dropna().median()

# Height (cm)
temp_height_m = clean_numeric_data['Height (m)'].fillna(clean_numeric_data['Height (m)'].mean())

def correlated_imputer(df):
    df['Waist-to-Height Ratio'] = df['Waist-to-Height Ratio'].fillna(k_waist_to_height_ratio * temp_abdominal)
    df['Abdominal Circumference (cm)'] = df['Abdominal Circumference (cm)'].fillna(k_abdominal_circumference * df['Waist-to-Height Ratio'])
    df['Total Cholesterol (mg/dL)'] = df['Total Cholesterol (mg/dL)'].fillna(k_total_cholesterol * temp_ldl)
    df['Estimated LDL (mg/dL)'] = df['Estimated LDL (mg/dL)'].fillna(k_estimated_ldl * df['Total Cholesterol (mg/dL)'])
    df['Height (cm)'] = df['Height (cm)'].fillna(temp_height_m * 100)
    df['Height (m)'] = df['Height (m)'].fillna(df['Height (cm)'] / 100)
    return df

corr_imputer = FunctionTransformer(correlated_imputer)
corr_features = ['Waist-to-Height Ratio', 'Abdominal Circumference (cm)', 'Total Cholesterol (mg/dL)', 'Estimated LDL (mg/dL)', 'Height (cm)', 'Height (m)']
imputer_transformer = ColumnTransformer(transformers=[
    ('simple', simple_imputer_transformer, median_columns + mean_columns),
    ('correlated', corr_imputer, corr_features)
])
numeric_transformer.steps.append(('imputer', imputer_transformer))

### Eliminación de variables con alta multicolinealidad

In [85]:
columns_vif=['Height (cm)', 'Waist-to-Height Ratio', 'Estimated LDL (mg/dL)', 'Blood Pressure Category']
def drop_high_vif(df):
    f_names = median_columns + mean_columns + corr_features
    df = pd.DataFrame(df, columns=f_names)
    return df.drop(columns=columns_vif, errors='ignore')
dropper_vif = FunctionTransformer(drop_high_vif)
numeric_transformer.steps.append(('dropper_vif', dropper_vif))

### Escalado MinMax

In [86]:
numeric_transformer.steps.append(('scaler', MinMaxScaler()))

### Construcción del pipeline final

In [87]:
set_config(display="diagram")
numeric_transformer

0,1,2
,"steps  steps: list of tuples List of (name of step, estimator) tuples that are to be chained in sequential order. To be compatible with the scikit-learn API, all steps must define `fit`. All non-last steps must also define `transform`. See :ref:`Combining Estimators ` for more details.","[('rangos', ...), ('imputer', ...), ...]"
,"transform_input  transform_input: list of str, default=None The names of the :term:`metadata` parameters that should be transformed by the pipeline before passing it to the step consuming it. This enables transforming some input arguments to ``fit`` (other than ``X``) to be transformed by the steps of the pipeline up to the step which requires them. Requirement is defined via :ref:`metadata routing `. For instance, this can be used to pass a validation set through the pipeline. You can only set this if metadata routing is enabled, which you can enable using ``sklearn.set_config(enable_metadata_routing=True)``. .. versionadded:: 1.6",
,"memory  memory: str or object with the joblib.Memory interface, default=None Used to cache the fitted transformers of the pipeline. The last step will never be cached, even if it is a transformer. By default, no caching is performed. If a string is given, it is the path to the caching directory. Enabling caching triggers a clone of the transformers before fitting. Therefore, the transformer instance given to the pipeline cannot be inspected directly. Use the attribute ``named_steps`` or ``steps`` to inspect estimators within the pipeline. Caching the transformers is advantageous when fitting is time consuming. See :ref:`sphx_glr_auto_examples_neighbors_plot_caching_nearest_neighbors.py` for an example on how to enable caching.",
,"verbose  verbose: bool, default=False If True, the time elapsed while fitting each step will be printed as it is completed.",False

0,1,2
,"func  func: callable, default=None The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function.",<function lim...001D11739A400>
,"inverse_func  inverse_func: callable, default=None The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function.",
,"validate  validate: bool, default=False Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or  sparse matrix. If the conversion is not possible an exception is  raised. .. versionchanged:: 0.22  The default of ``validate`` changed from True to False.",False
,"accept_sparse  accept_sparse: bool, default=False Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised.",False
,"check_inverse  check_inverse: bool, default=True Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20",True
,"feature_names_out  feature_names_out: callable, 'one-to-one' or None, default=None Determines the list of feature names that will be returned by the `get_feature_names_out` method. If it is 'one-to-one', then the output feature names will be equal to the input feature names. If it is a callable, then it must take two positional arguments: this `FunctionTransformer` (`self`) and an array-like of input feature names (`input_features`). It must return an array-like of output feature names. The `get_feature_names_out` method is only defined if `feature_names_out` is not None. See ``get_feature_names_out`` for more details. .. versionadded:: 1.1",
,"kw_args  kw_args: dict, default=None Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18",
,"inv_kw_args  inv_kw_args: dict, default=None Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18",

0,1,2
,"transformers  transformers: list of tuples List of (name, transformer, columns) tuples specifying the transformer objects to be applied to subsets of the data. name : str  Like in Pipeline and FeatureUnion, this allows the transformer and  its parameters to be set using ``set_params`` and searched in grid  search. transformer : {'drop', 'passthrough'} or estimator  Estimator must support :term:`fit` and :term:`transform`.  Special-cased strings 'drop' and 'passthrough' are accepted as  well, to indicate to drop the columns or to pass them through  untransformed, respectively. columns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable  Indexes the data on its second axis. Integers are interpreted as  positional columns, while strings can reference DataFrame columns  by name. A scalar string or int should be used where  ``transformer`` expects X to be a 1d array-like (vector),  otherwise a 2d array will be passed to the transformer.  A callable is passed the input data `X` and can return any of the  above. To select multiple columns by name or dtype, you can use  :obj:`make_column_selector`.","[('simple', ...), ('correlated', ...)]"
,"remainder  remainder: {'drop', 'passthrough'} or estimator, default='drop' By default, only the specified columns in `transformers` are transformed and combined in the output, and the non-specified columns are dropped. (default of ``'drop'``). By specifying ``remainder='passthrough'``, all remaining columns that were not specified in `transformers`, but present in the data passed to `fit` will be automatically passed through. This subset of columns is concatenated with the output of the transformers. For dataframes, extra columns not seen during `fit` will be excluded from the output of `transform`. By setting ``remainder`` to be an estimator, the remaining non-specified columns will use the ``remainder`` estimator. The estimator must support :term:`fit` and :term:`transform`. Note that using this feature requires that the DataFrame columns input at :term:`fit` and :term:`transform` have identical order.",'drop'
,"sparse_threshold  sparse_threshold: float, default=0.3 If the output of the different transformers contains sparse matrices, these will be stacked as a sparse matrix if the overall density is lower than this value. Use ``sparse_threshold=0`` to always return dense. When the transformed output consists of all dense data, the stacked result will be dense, and this keyword will be ignored.",0.3
,"n_jobs  n_jobs: int, default=None Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details.",
,"transformer_weights  transformer_weights: dict, default=None Multiplicative weights for features per transformer. The output of the transformer is multiplied by these weights. Keys are transformer names, values the weights.",
,"verbose  verbose: bool, default=False If True, the time elapsed while fitting each transformer will be printed as it is completed.",False
,"verbose_feature_names_out  verbose_feature_names_out: bool, str or Callable[[str, str], str], default=True - If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix  all feature names with the name of the transformer that generated that  feature. It is equivalent to setting  `verbose_feature_names_out=""{transformer_name}__{feature_name}""`. - If False, :meth:`ColumnTransformer.get_feature_names_out` will not  prefix any feature names and will error if feature names are not  unique. - If ``Callable[[str, str], str]``,  :meth:`ColumnTransformer.get_feature_names_out` will rename all the features  using the name of the transformer. The first argument of the callable is the  transformer name and the second argument is the feature name. The returned  string will be the new feature name. - If ``str``, it must be a string ready for formatting. The given string will  be formatted using two field names: ``transformer_name`` and ``feature_name``.  e.g. ``""{feature_name}__{transformer_name}""``. See :meth:`str.format` method  from the standard library for more info. .. versionadded:: 1.0 .. versionchanged:: 1.6  `verbose_feature_names_out` can be a callable or a string to be formatted.",True
,"force_int_remainder_cols  force_int_remainder_cols: bool, default=False This parameter has no effect. .. note::  If you do not access the list of columns for the remainder columns  in the `transformers_` fitted attribute, you do not need to set  this parameter. .. versionadded:: 1.5 .. versionchanged:: 1.7  The default value for `force_int_remainder_cols` will change from  `True` to `False` in version 1.7. .. deprecated:: 1.7  `force_int_remainder_cols` is deprecated and will be removed in 1.9.",'deprecated'

0,1,2
,"func  func: callable, default=None The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function.",<function med...001D11739A610>
,"inverse_func  inverse_func: callable, default=None The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function.",
,"validate  validate: bool, default=False Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or  sparse matrix. If the conversion is not possible an exception is  raised. .. versionchanged:: 0.22  The default of ``validate`` changed from True to False.",False
,"accept_sparse  accept_sparse: bool, default=False Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised.",False
,"check_inverse  check_inverse: bool, default=True Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20",True
,"feature_names_out  feature_names_out: callable, 'one-to-one' or None, default=None Determines the list of feature names that will be returned by the `get_feature_names_out` method. If it is 'one-to-one', then the output feature names will be equal to the input feature names. If it is a callable, then it must take two positional arguments: this `FunctionTransformer` (`self`) and an array-like of input feature names (`input_features`). It must return an array-like of output feature names. The `get_feature_names_out` method is only defined if `feature_names_out` is not None. See ``get_feature_names_out`` for more details. .. versionadded:: 1.1",
,"kw_args  kw_args: dict, default=None Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18",
,"inv_kw_args  inv_kw_args: dict, default=None Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18",

0,1,2
,"func  func: callable, default=None The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function.",<function mea...001D11739A1F0>
,"inverse_func  inverse_func: callable, default=None The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function.",
,"validate  validate: bool, default=False Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or  sparse matrix. If the conversion is not possible an exception is  raised. .. versionchanged:: 0.22  The default of ``validate`` changed from True to False.",False
,"accept_sparse  accept_sparse: bool, default=False Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised.",False
,"check_inverse  check_inverse: bool, default=True Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20",True
,"feature_names_out  feature_names_out: callable, 'one-to-one' or None, default=None Determines the list of feature names that will be returned by the `get_feature_names_out` method. If it is 'one-to-one', then the output feature names will be equal to the input feature names. If it is a callable, then it must take two positional arguments: this `FunctionTransformer` (`self`) and an array-like of input feature names (`input_features`). It must return an array-like of output feature names. The `get_feature_names_out` method is only defined if `feature_names_out` is not None. See ``get_feature_names_out`` for more details. .. versionadded:: 1.1",
,"kw_args  kw_args: dict, default=None Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18",
,"inv_kw_args  inv_kw_args: dict, default=None Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18",

0,1,2
,"func  func: callable, default=None The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function.",<function cor...001D11739AF00>
,"inverse_func  inverse_func: callable, default=None The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function.",
,"validate  validate: bool, default=False Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or  sparse matrix. If the conversion is not possible an exception is  raised. .. versionchanged:: 0.22  The default of ``validate`` changed from True to False.",False
,"accept_sparse  accept_sparse: bool, default=False Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised.",False
,"check_inverse  check_inverse: bool, default=True Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20",True
,"feature_names_out  feature_names_out: callable, 'one-to-one' or None, default=None Determines the list of feature names that will be returned by the `get_feature_names_out` method. If it is 'one-to-one', then the output feature names will be equal to the input feature names. If it is a callable, then it must take two positional arguments: this `FunctionTransformer` (`self`) and an array-like of input feature names (`input_features`). It must return an array-like of output feature names. The `get_feature_names_out` method is only defined if `feature_names_out` is not None. See ``get_feature_names_out`` for more details. .. versionadded:: 1.1",
,"kw_args  kw_args: dict, default=None Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18",
,"inv_kw_args  inv_kw_args: dict, default=None Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18",

0,1,2
,"func  func: callable, default=None The callable to use for the transformation. This will be passed the same arguments as transform, with args and kwargs forwarded. If func is None, then func will be the identity function.",<function dro...001D11739BA00>
,"inverse_func  inverse_func: callable, default=None The callable to use for the inverse transformation. This will be passed the same arguments as inverse transform, with args and kwargs forwarded. If inverse_func is None, then inverse_func will be the identity function.",
,"validate  validate: bool, default=False Indicate that the input X array should be checked before calling ``func``. The possibilities are: - If False, there is no input validation. - If True, then X will be converted to a 2-dimensional NumPy array or  sparse matrix. If the conversion is not possible an exception is  raised. .. versionchanged:: 0.22  The default of ``validate`` changed from True to False.",False
,"accept_sparse  accept_sparse: bool, default=False Indicate that func accepts a sparse matrix as input. If validate is False, this has no effect. Otherwise, if accept_sparse is false, sparse matrix inputs will cause an exception to be raised.",False
,"check_inverse  check_inverse: bool, default=True Whether to check that or ``func`` followed by ``inverse_func`` leads to the original inputs. It can be used for a sanity check, raising a warning when the condition is not fulfilled. .. versionadded:: 0.20",True
,"feature_names_out  feature_names_out: callable, 'one-to-one' or None, default=None Determines the list of feature names that will be returned by the `get_feature_names_out` method. If it is 'one-to-one', then the output feature names will be equal to the input feature names. If it is a callable, then it must take two positional arguments: this `FunctionTransformer` (`self`) and an array-like of input feature names (`input_features`). It must return an array-like of output feature names. The `get_feature_names_out` method is only defined if `feature_names_out` is not None. See ``get_feature_names_out`` for more details. .. versionadded:: 1.1",
,"kw_args  kw_args: dict, default=None Dictionary of additional keyword arguments to pass to func. .. versionadded:: 0.18",
,"inv_kw_args  inv_kw_args: dict, default=None Dictionary of additional keyword arguments to pass to inverse_func. .. versionadded:: 0.18",

0,1,2
,"feature_range  feature_range: tuple (min, max), default=(0, 1) Desired range of transformed data.","(0, ...)"
,"copy  copy: bool, default=True Set to False to perform inplace row normalization and avoid a copy (if the input is already a numpy array).",True
,"clip  clip: bool, default=False Set to True to clip transformed values of held-out data to provided `feature_range`. Since this parameter will clip values, `inverse_transform` may not be able to restore the original data. .. note::  Setting `clip=True` does not prevent feature drift (a distribution  shift between training and test data). The transformed values are clipped  to the `feature_range`, which helps avoid unintended behavior in models  sensitive to out-of-range inputs (e.g. linear models). Use with care,  as clipping can distort the distribution of test data. .. versionadded:: 0.24",False


## Comparación y selección de modelos
Para evaluar distintos modelos de regresión polinomial aplicamos transformación de variables polinomiales al transformador numérico, para después ajustarlo a un modelo de regresión lineal. Evaluaremos modelos de hasta n grado.

In [None]:
pipelines = []
for deg in range(1, 4):
    numeric_transformer.steps.append(('polinomial', PolynomialFeatures(degree=deg)))
    preprocessing_transformer = ColumnTransformer(transformers=[
        ('binary', binary_transformer, binary_features),
        ('ordinal', ordinal_transformer, ordinal_features),
        ('numeric', numeric_transformer, numeric_features)
    ])
    pipeline_regresion = Pipeline(steps=[
        ('dropper', dropper),
        ('preprocesamiento', preprocessing_transformer),
        ('modelo', LinearRegression())
    ])
    pipelines.append(pipeline_regresion)