### Handling Missing Values - Imputation within ML Pipelines
**Description**: Implement a machine learning pipeline that includes imputation and a classifier.

In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# Create a sample DataFrame with missing values and a target variable
data_pipeline = {'numerical_feature_1': [1, 2, None, 4, 5, None, 7, 8],
                 'numerical_feature_2': [2.1, 3.5, 1.8, 4.2, None, 2.9, 5.1, 3.8],
                 'categorical_feature': ['A', None, 'B', 'A', 'C', 'B', None, 'A'],
                 'target': [0, 1, 0, 1, 0, 1, 0, 1]}
df_pipeline = pd.DataFrame(data_pipeline)

# Separate features (X) and target (y)
X = df_pipeline.drop('target', axis=1)
y = df_pipeline['target']

# Identify numerical and categorical features
numerical_features_pipeline = X.select_dtypes(include=['number']).columns
categorical_features_pipeline = X.select_dtypes(include=['object']).columns

# Create preprocessing pipelines for numerical and categorical features
numerical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', pd.get_dummies) # Using pandas get_dummies within the pipeline
])

# Combine preprocessing steps using ColumnTransformer (not strictly needed for this simple case)
# from sklearn.compose import ColumnTransformer
# preprocessor = ColumnTransformer([
#     ('num', numerical_pipeline, numerical_features_pipeline),
#     ('cat', categorical_pipeline, categorical_features_pipeline)
# ])

# Create the full machine learning pipeline
model_pipeline = Pipeline([
    ('preprocessor_num', numerical_pipeline),
    ('preprocessor_cat', categorical_pipeline),
    ('classifier', LogisticRegression())
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Preprocess the training data (handling categorical features separately for now)
X_train_numerical = numerical_pipeline.fit_transform(X_train[numerical_features_pipeline])
X_train_categorical = categorical_pipeline.fit_transform(X_train[categorical_features_pipeline])
X_train_processed = pd.concat([pd.DataFrame(X_train_numerical, columns=numerical_features_pipeline),
                               pd.DataFrame(X_train_categorical, columns=pd.get_dummies(X_train[categorical_features_pipeline]).columns)], axis=1)

# Train the classifier
model_pipeline.fit(X_train_processed, y_train)

# Preprocess the testing data
X_test_numerical = numerical_pipeline.transform(X_test[numerical_features_pipeline])
X_test_categorical = categorical_pipeline.transform(X_test[categorical_features_pipeline])
X_test_processed = pd.concat([pd.DataFrame(X_test_numerical, columns=numerical_features_pipeline),
                              pd.DataFrame(X_test_categorical, columns=pd.get_dummies(X_test[categorical_features_pipeline]).columns)], axis=1)

# Make predictions on the test set
y_pred = model_pipeline.predict(X_test_processed)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the pipeline: {accuracy:.2f}")

print("\nPipeline Steps:")
for name, step in model_pipeline.steps:
    print(f"- {name}: {step}")

AttributeError: This 'Pipeline' has no attribute 'fit_transform'