### Scaling & Feature Selection in a Pipeline
**Description**: Create a pipeline that includes feature scaling, variance threshold selection, and a classification model.

In [None]:
# write your code from here

In [1]:
import numpy as np
import pandas as pd
import unittest
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# ---------------------------------------------
# Step 1: Load Data
# ---------------------------------------------
def load_data():
    try:
        data = load_breast_cancer()
        X = pd.DataFrame(data.data, columns=data.feature_names)
        y = data.target
        return train_test_split(X, y, test_size=0.2, random_state=42)
    except Exception as e:
        print(f"Error loading data: {e}")
        raise

# ---------------------------------------------
# Step 2: Create ML Pipeline
# ---------------------------------------------
def create_pipeline(threshold=0.01):
    try:
        pipeline = Pipeline([
            ('scaler', StandardScaler()),
            ('selector', VarianceThreshold(threshold=threshold)),
            ('classifier', LogisticRegression(max_iter=1000))
        ])
        return pipeline
    except Exception as e:
        print(f"Error creating pipeline: {e}")
        raise

# ---------------------------------------------
# Step 3: Train and Evaluate
# ---------------------------------------------
def train_and_evaluate(X_train, X_test, y_train, y_test):
    try:
        pipeline = create_pipeline()
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {acc:.2f}")
        return acc, pipeline
    except Exception as e:
        print(f"Training/Evaluation error: {e}")
        raise

# ---------------------------------------------
# Step 4: Unit Tests
# ---------------------------------------------
class TestPipeline(unittest.TestCase):

    def test_data_split(self):
        X_train, X_test, y_train, y_test = load_data()
        self.assertEqual(X_train.shape[0], 455)
        self.assertEqual(X_test.shape[0], 114)

    def test_pipeline_accuracy(self):
        X_train, X_test, y_train, y_test = load_data()
        acc, _ = train_and_evaluate(X_train, X_test, y_train, y_test)
        self.assertGreater(acc, 0.8, "Accuracy should be > 80%")

    def test_feature_selection(self):
        X_train, X_test, y_train, y_test = load_data()
        _, pipeline = train_and_evaluate(X_train, X_test, y_train, y_test)
        selected_features = pipeline.named_steps['selector'].get_support(indices=True)
        self.assertTrue(len(selected_features) < X_train.shape[1], "Feature selection should reduce features")

# ---------------------------------------------
# Step 5: Execute Full Workflow
# ---------------------------------------------
if __name__ == "__main__":
    X_train, X_test, y_train, y_test = load_data()
    train_and_evaluate(X_train, X_test, y_train, y_test)
    unittest.main(argv=[''], exit=False)


.F.
FAIL: test_feature_selection (__main__.TestPipeline)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_12511/59666647.py", line 74, in test_feature_selection
    self.assertTrue(len(selected_features) < X_train.shape[1], "Feature selection should reduce features")
AssertionError: False is not true : Feature selection should reduce features

----------------------------------------------------------------------
Ran 3 tests in 0.113s

FAILED (failures=1)


Accuracy: 0.97
Accuracy: 0.97
Accuracy: 0.97
