    Task: Complete Pipeline for a Dataset
1. Objective: Build a complex pipeline with multiple transformations.
2. Steps:
    - Load a sample dataset.
    - Define a transformation pipeline with both imputation and scaling.

In [1]:
# Write your code from here
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

def create_full_pipeline(impute_strategy='mean', scale=True):
    """
    Create a pipeline that performs imputation followed by optional scaling.
    
    Args:
        impute_strategy (str): Strategy for SimpleImputer ('mean', 'median', 'most_frequent', or 'constant')
        scale (bool): Whether to apply StandardScaler after imputation
    
    Returns:
        sklearn.pipeline.Pipeline: Configured pipeline
    """
    steps = []
    
    # Imputation step
    steps.append(('imputer', SimpleImputer(strategy=impute_strategy)))
    
    # Scaling step (optional)
    if scale:
        steps.append(('scaler', StandardScaler()))
    
    pipeline = Pipeline(steps)
    return pipeline

def apply_pipeline(df, features, impute_strategy='mean', scale=True):
    """
    Apply the full pipeline to specified features in the dataframe.
    
    Args:
        df (pd.DataFrame): Input dataframe
        features (list): List of feature column names to transform
        impute_strategy (str): Imputation strategy
        scale (bool): Whether to scale after imputation
        
    Returns:
        pd.DataFrame: DataFrame with transformed features replacing originals
    """
    pipeline = create_full_pipeline(impute_strategy, scale)
    transformed_data = pipeline.fit_transform(df[features])
    
    df_transformed = df.copy()
    df_transformed[features] = transformed_data
    return df_transformed

# Example usage
if __name__ == "__main__":
    data = {
        'age': [25, None, 47, 51, 62],
        'income': [50000, 60000, None, 90000, 120000],
        'gender': ['M', 'F', 'F', 'M', 'F']
    }
    df = pd.DataFrame(data)
    numerical_features = ['age', 'income']

    df_processed = apply_pipeline(df, numerical_features, impute_strategy='mean', scale=True)
    print(df_processed)



        age    income gender
0 -1.767461 -1.224745      M
1  0.000000 -0.816497      F
2  0.062381  0.000000      F
3  0.395080  0.408248      M
4  1.310001  1.632993      F


In [2]:
# Task: Imputation Function








# Scaling Function









# Combined Transformation Function







