### Scaling & Feature Selection in a Pipeline
**Description**: Create a pipeline that includes feature scaling, variance threshold selection, and a classification model.

In [1]:
# write your code from here

import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.exceptions import NotFittedError


# -------------------------------
# Utility: Validate Input Data
# -------------------------------
def validate_input(df, feature_cols, target_col):
    if df.empty:
        raise ValueError("❌ DataFrame is empty.")
    missing_cols = set(feature_cols + [target_col]) - set(df.columns)
    if missing_cols:
        raise ValueError(f"❌ Missing required columns: {missing_cols}")
    return True


# -------------------------------
# Pipeline Builder
# -------------------------------
def build_pipeline(variance_threshold=0.01):
    pipeline = Pipeline(steps=[
        ('scaler', StandardScaler()),
        ('variance_selector', VarianceThreshold(threshold=variance_threshold)),
        ('classifier', LogisticRegression())
    ])
    return pipeline


# -------------------------------
# Example Dataset
# -------------------------------
data = {
    'feature1': [1, 1, 1, 1, 1, 1, 1],  # Low variance
    'feature2': [10, 20, 30, 40, 50, 60, 70],
    'feature3': [5, 3, 6, 2, 1, 9, 8],
    'target':   [0, 1, 0, 1, 1, 0, 1]
}

df = pd.DataFrame(data)
features = ['feature1', 'feature2', 'feature3']
target = 'target'

# -------------------------------
# Main Execution
# -------------------------------
try:
    validate_input(df, features, target)

    X = df[features]
    y = df[target]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model_pipeline = build_pipeline(variance_threshold=0.01)
    model_pipeline.fit(X_train, y_train)

    y_pred = model_pipeline.predict(X_test)

    print("✅ Accuracy:", accuracy_score(y_test, y_pred))
    print("✅ Classification Report:\n", classification_report(y_test, y_pred))

except ValueError as ve:
    print(ve)
except NotFittedError:
    print("❌ Model was not fitted properly.")

✅ Accuracy: 0.6666666666666666
✅ Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.50      1.00      0.67         1

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3

