### Ensuring Feature Consistency Between Training & InferencePipelines:

**Task 1**: Consistent Feature Preparation
- Step 1: Write a function for data preprocessing and imputation shared by both training and inference pipelines.
- Step 2: Demonstrate consistent application on both datasets.

In [None]:
# write your code from here
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

def create_preprocessing_pipeline():
    try:
        pipeline = Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ])
        return pipeline
    except Exception as e:
        return f"Error: {str(e)}"

def preprocess_train_infer(train_df, infer_df):
    try:
        if not isinstance(train_df, pd.DataFrame) or not isinstance(infer_df, pd.DataFrame):
            return "Both inputs must be pandas DataFrames."
        if train_df.shape[1] != infer_df.shape[1]:
            return "Train and inference data must have the same number of features."

        pipeline = create_preprocessing_pipeline()
        train_processed = pd.DataFrame(pipeline.fit_transform(train_df), columns=train_df.columns)
        infer_processed = pd.DataFrame(pipeline.transform(infer_df), columns=infer_df.columns)
        return train_processed, infer_processed
    except Exception as e:
        return f"Error: {str(e)}"


**Task 2**: Pipeline Integration
- Step 1: Use sklearn pipelines to encapsulate the preprocessing steps.
- Step 2: Configure identical pipelines for both training and building inference models.

In [None]:
# write your code from here
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

def build_model_pipeline():
    try:
        pipeline = Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler()),
            ('classifier', LogisticRegression())
        ])
        return pipeline
    except Exception as e:
        return f"Error: {str(e)}"

def train_and_infer(train_df, train_labels, infer_df):
    try:
        if not (isinstance(train_df, pd.DataFrame) and isinstance(infer_df, pd.DataFrame)):
            return "Inputs must be pandas DataFrames."
        if train_df.shape[1] != infer_df.shape[1]:
            return "Train and inference data must have the same number of features."
        if len(train_df) != len(train_labels):
            return "Number of training samples and labels must match."

        pipeline = build_model_pipeline()
        pipeline.fit(train_df, train_labels)
        predictions = pipeline.predict(infer_df)
        return predictions
    except Exception as e:
        return f"Error: {str(e)}"


**Task 3**: Saving and Loading Preprocessing Models
- Step 1: Save the transformation model after fitting it to the training data.
- Step 2: Load and apply the saved model during inference.

In [None]:
# write your code from here
import joblib
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import pandas as pd

def build_pipeline():
    pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler()),
        ('classifier', LogisticRegression())
    ])
    return pipeline

def train_save_pipeline(train_df, train_labels, filepath):
    try:
        pipeline = build_pipeline()
        pipeline.fit(train_df, train_labels)
        joblib.dump(pipeline, filepath)
        return f"Pipeline saved to {filepath}"
    except Exception as e:
        return f"Error during training/saving: {str(e)}"

def load_and_predict(infer_df, filepath):
    try:
        pipeline = joblib.load(filepath)
        if not isinstance(infer_df, pd.DataFrame):
            return "Inference data must be a pandas DataFrame."
        predictions = pipeline.predict(infer_df)
        return predictions
    except Exception as e:
        return f"Error during loading/prediction: {str(e)}"
