### Ensuring Feature Consistency Between Training & InferencePipelines:

**Task 1**: Consistent Feature Preparation
- Step 1: Write a function for data preprocessing and imputation shared by both training and inference pipelines.
- Step 2: Demonstrate consistent application on both datasets.

In [4]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import unittest

class PreprocessingPipeline:
    def __init__(self):
        self.imputer = SimpleImputer(strategy='median')
        self.scaler = StandardScaler()
        self.columns = None

    def fit(self, df: pd.DataFrame):
        self._validate_input(df)
        self.columns = df.columns.tolist()
        self.imputer.fit(df)
        imputed = self.imputer.transform(df)
        self.scaler.fit(imputed)

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        self._validate_input(df, require_columns=True)
        df = df[self.columns]  # Ensure consistent column order
        imputed = self.imputer.transform(df)
        scaled = self.scaler.transform(imputed)
        return pd.DataFrame(scaled, columns=self.columns)

    def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
        self.fit(df)
        return self.transform(df)

    def _validate_input(self, df: pd.DataFrame, require_columns=False):
        if not isinstance(df, pd.DataFrame):
            raise ValueError("Input must be a pandas DataFrame.")
        if df.empty:
            raise ValueError("Input DataFrame is empty.")
        if require_columns and self.columns:
            missing = set(self.columns) - set(df.columns)
            if missing:
                raise ValueError(f"Missing columns in input data: {missing}")
        if not all(np.issubdtype(dtype, np.number) for dtype in df.dtypes):
            raise TypeError("All columns must be numeric.")

# ========== Example Usage ==========
if __name__ == "__main__":
    train_df = pd.DataFrame({
        'feature1': [1, 2, np.nan, 4],
        'feature2': [10, 15, 10, np.nan]
    })

    test_df = pd.DataFrame({
        'feature1': [3, np.nan],
        'feature2': [12, 11]
    })

    pipeline = PreprocessingPipeline()
    train_processed = pipeline.fit_transform(train_df)
    test_processed = pipeline.transform(test_df)

    print("Processed Train Data:\n", train_processed)
    print("Processed Test Data:\n", test_processed)

# ========== Unit Tests ==========
class TestPreprocessingPipeline(unittest.TestCase):

    def setUp(self):
        self.pipeline = PreprocessingPipeline()
        self.train_df = pd.DataFrame({
            'feature1': [1, 2, np.nan, 4],
            'feature2': [10, 15, 10, np.nan]
        })
        self.test_df = pd.DataFrame({
            'feature1': [3, np.nan],
            'feature2': [12, 11]
        })
        self.pipeline.fit(self.train_df)

    def test_transform_shapes_match(self):
        result = self.pipeline.transform(self.test_df)
        self.assertEqual(result.shape, self.test_df.shape)

    def test_empty_dataframe(self):
        with self.assertRaises(ValueError):
            self.pipeline.fit(pd.DataFrame())

    def test_invalid_dtype(self):
        invalid_df = pd.DataFrame({'feature1': ['a', 'b'], 'feature2': ['c', 'd']})
        with self.assertRaises(TypeError):
            self.pipeline.fit(invalid_df)

    def test_missing_column(self):
        bad_df = pd.DataFrame({'feature1': [1, 2]})
        with self.assertRaises(ValueError):
            self.pipeline.transform(bad_df)

# Uncomment below line to run unit tests directly
# unittest.main()


Processed Train Data:
    feature1  feature2
0 -1.147079 -0.577350
1 -0.229416  1.732051
2 -0.229416 -0.577350
3  1.605910 -0.577350
Processed Test Data:
    feature1  feature2
0  0.688247   0.34641
1 -0.229416  -0.11547


**Task 2**: Pipeline Integration
- Step 1: Use sklearn pipelines to encapsulate the preprocessing steps.
- Step 2: Configure identical pipelines for both training and building inference models.

In [5]:
# write your code from here

**Task 3**: Saving and Loading Preprocessing Models
- Step 1: Save the transformation model after fitting it to the training data.
- Step 2: Load and apply the saved model during inference.

In [6]:
# write your code from here