    Task: Complete Pipeline for a Dataset
1. Objective: Build a complex pipeline with multiple transformations.
2. Steps:
    - Load a sample dataset.
    - Define a transformation pipeline with both imputation and scaling.

In [None]:
# Write your code from here
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

# Step 1: Create or load a sample dataset
data = {
    'Age': [25, 30, np.nan, 45, 50],
    'Income': [50000, np.nan, 60000, 65000, np.nan],
    'Score': [200, 250, 300, np.nan, 400]
}
df = pd.DataFrame(data)

# Step 2: Define numerical columns
numeric_features = ['Age', 'Income', 'Score']

# Step 3: Create a pipeline for imputation and scaling
numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Step 4: Combine transformations using ColumnTransformer
preprocessor = ColumnTransformer([
    ('num', numeric_pipeline, numeric_features)
])

# Step 5: Apply the transformation
transformed_data = preprocessor.fit_transform(df)

# Step 6: Convert the result back to DataFrame for viewing
transformed_df = pd.DataFrame(transformed_data, columns=numeric_features)
print(transformed_df)



In [None]:
# Task: Imputation Function


import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

def impute_missing_values(df, numeric_columns):
    imputer = SimpleImputer(strategy='mean')
    df[numeric_columns] = imputer.fit_transform(df[numeric_columns])
    return df






# Scaling Function


from sklearn.preprocessing import StandardScaler

def scale_features(df, numeric_columns):
    scaler = StandardScaler()
    df[numeric_columns] = scaler.fit_transform(df[numeric_columns])
    return df







# Combined Transformation Function




def transform_dataset(df, numeric_columns):
    df = impute_missing_values(df, numeric_columns)
    df = scale_features(df, numeric_columns)
    return df



