    Task: Complete Pipeline for a Dataset
1. Objective: Build a complex pipeline with multiple transformations.
2. Steps:
    - Load a sample dataset.
    - Define a transformation pipeline with both imputation and scaling.

In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Cell 2: Load dataset and check for issues
try:
    data = load_diabetes()
    X, y = data.data, data.target
    
    if X is None or X.size == 0:
        raise ValueError("Dataset is empty.")
    
    print("Dataset loaded successfully.")
    print("Shape:", X.shape)
    
except Exception as e:
    print("Error loading dataset:", e)

# Cell 3: Introduce missing values artificially (for demo)
rng = np.random.RandomState(42)
missing_mask = rng.rand(*X.shape) < 0.1  # 10% missing values
X[missing_mask] = np.nan
print("Missing values introduced.")

# Cell 4: Define transformation pipeline function
def build_pipeline():
    """
    Returns a pipeline with:
    - Mean imputation
    - Standard scaling
    """
    return Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])

# Cell 5: Apply the pipeline with error handling
try:
    pipeline = build_pipeline()
    X_transformed = pipeline.fit_transform(X)
    print("Pipeline executed successfully.")
    print("Transformed shape:", X_transformed.shape)
    print("Preview of transformed data:\n", X_transformed[:5])
except Exception as e:
    print("Pipeline execution failed:", e)

# Cell 6: Function to test pipeline (basic tests)
def test_pipeline():
    try:
        # Test shape and no missing values
        assert X_transformed.shape == X.shape, "Shape mismatch after transformation"
        assert not np.isnan(X_transformed).any(), "NaN values found in transformed data"
        print("All tests passed.")
    except AssertionError as e:
        print("Test failed:", e)

# Cell 7: Run tests
test_pipeline()

Dataset loaded successfully.
Shape: (442, 10)
Missing values introduced.
Pipeline executed successfully.
Transformed shape: (442, 10)
Preview of transformed data:
 [[ 8.35080170e-01  1.08350996e+00  1.42833065e+00  4.67357604e-01
  -1.00112790e+00 -7.71537921e-01  7.51356314e-18 -3.49217275e-02
   4.33613304e-01 -3.71984034e-01]
 [ 9.61916718e-18 -1.03013508e+00 -1.14835330e+00 -5.94943174e-01
  -2.11460865e-01 -4.31149808e-01  1.72829366e+00 -8.56609433e-01
  -1.52231970e+00 -2.05133645e+00]
 [ 1.88249997e+00  1.08350996e+00  1.03569309e+00 -1.39671412e-01
  -1.03149971e+00 -7.57922396e-01 -7.38049321e-01 -3.49217275e-02
   5.57626093e-02  6.34938491e-17]
 [-1.98489622e+00 -1.03013508e+00  4.93706764e-18 -8.22579055e-01
   2.44116269e-01  5.28744668e-01 -8.23095630e-01 -9.65689469e-18
   4.95241358e-01 -1.85389321e-01]
 [ 1.09943383e-01 -1.03013508e+00  4.93706764e-18  4.67357604e-01
   6.18854152e-02  3.24511801e-01  1.97460086e-01 -3.49217275e-02
  -7.16709693e-01 -1.02506553e+00]]


In [5]:
import unittest
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

def build_pipeline():
    return Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])

class TestPipeline(unittest.TestCase):
    def test_pipeline_execution(self):
        X = np.array([[1, 2], [np.nan, 3], [7, 6]])
        pipe = build_pipeline()
        X_trans = pipe.fit_transform(X)
        self.assertEqual(X_trans.shape, (3, 2))
        self.assertFalse(np.isnan(X_trans).any())

if __name__ == '__main__':
    unittest.main()

usage: ipykernel_launcher.py [-h] [-v] [-q] [--locals] [-f] [-c] [-b]
                             [-k TESTNAMEPATTERNS]
                             [tests ...]
ipykernel_launcher.py: error: argument -f/--failfast: ignored explicit argument '/home/vscode/.local/share/jupyter/runtime/kernel-v3737b002486cd363514cda598eafd05d98a39f9ad.json'


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
