In [66]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

train = pd.read_csv("../train.csv")
test = pd.read_csv("../test.csv")
X_train, y_train = train.drop("Survived", axis="columns"), train["Survived"]
X_test, y_test = test.drop("Survived", axis="columns"), train["Survived"]

MAX_DEPTH = None
MAX_FEATURES = "sqrt"
n_trees=20

numeric_features = ["Age", "Fare"]
categorical_features = ["Embarked", "Sex"]

# Variables numériques
numeric_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", MinMaxScaler()),
    ]
)

# Variables catégorielles
categorical_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("onehot", OneHotEncoder()),
    ]
)

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("Preprocessing numerical", numeric_transformer, numeric_features),
        (
            "Preprocessing categorical",
            categorical_transformer,
            categorical_features,
        ),
    ]
)

# Pipeline
pipe = Pipeline(
    [
        ("preprocessor", preprocessor),
        ("classifier", RandomForestClassifier(
            n_estimators=n_trees,
            max_depth=MAX_DEPTH,
            max_features=MAX_FEATURES
        )),
    ]
)

pipe

pipe.fit(X_train, y_train)

ValueError: Input y contains NaN.

In [67]:
pipe

In [60]:
pipe[0]

In [61]:
pipe[:-1]

In [31]:
pipe['preprocessor']

In [64]:
pipe['preprocessor']['Preprocessing numerical']

TypeError: ColumnTransformer is subscriptable after it is fitted

In [32]:
preprocessor = pipe.named_steps["preprocessor"]
preprocessor

In [55]:
preprocessor._transformers

[('Preprocessing numerical',
  Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                  ('scaler', MinMaxScaler())])),
 ('Preprocessing categorical',
  Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')),
                  ('onehot', OneHotEncoder())]))]

In [58]:
preprocessor.named_transformers_

AttributeError: 'ColumnTransformer' object has no attribute 'transformers_'

In [57]:
preprocessor.named_transformers_["Preprocessing numerical"]

AttributeError: 'ColumnTransformer' object has no attribute 'transformers_'

In [20]:
import numpy as np

new_data = {
    "Age": [22, np.nan, 35, 28, np.nan],
    "Fare": [7.25, 8.05, np.nan, 13.00, 15.50]
}

new_data = pd.DataFrame(new_data)

In [26]:
pipe['preprocessor']['Preprocessing numerical']

In [34]:
numeric_preprocessor = preprocessor.named_transformers_["Preprocessing numerical"]

# Appliquer la transformation aux nouvelles données (uniquement les colonnes numériques)
new_data_transformed = numeric_preprocessor.transform(new_data[numeric_features])

# Convertir en DataFrame avec les noms des colonnes d'origine
new_data_transformed = pd.DataFrame(new_data_transformed, columns=numeric_features)

print(new_data_transformed)


AttributeError: 'ColumnTransformer' object has no attribute 'transformers_'