### Feature Selection - Using Mutual Information
**Description**: Use mutual information for feature selection to identify important features.

In [None]:
# write your code from here

In [2]:
import unittest
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# -------------------------------------------
# Step 1: Load and prepare dataset
# -------------------------------------------
def load_data():
    try:
        iris = load_iris()
        X = pd.DataFrame(iris.data, columns=iris.feature_names)
        y = iris.target
        return train_test_split(X, y, test_size=0.2, random_state=42)
    except Exception as e:
        print(f"Data loading failed: {e}")
        raise

# -------------------------------------------
# Step 2: Create pipeline with mutual info
# -------------------------------------------
def create_pipeline(k=2):
    try:
        pipeline = Pipeline([
            ('feature_selection', SelectKBest(score_func=mutual_info_classif, k=k)),
            ('classifier', RandomForestClassifier(random_state=42))
        ])
        return pipeline
    except Exception as e:
        print(f"Pipeline creation failed: {e}")
        raise

# -------------------------------------------
# Step 3: Train and evaluate the model
# -------------------------------------------
def train_and_evaluate(X_train, X_test, y_train, y_test):
    try:
        pipeline = create_pipeline(k=2)
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        print(f"Accuracy: {acc:.2f}")
        return acc
    except Exception as e:
        print(f"Training or evaluation failed: {e}")
        raise

# -------------------------------------------
# Step 4: Unit Tests
# -------------------------------------------
class TestMutualInfoPipeline(unittest.TestCase):

    def test_data_load(self):
        """Test whether data is loaded and split correctly"""
        X_train, X_test, y_train, y_test = load_data()
        self.assertEqual(X_train.shape[0], 120)
        self.assertEqual(X_test.shape[0], 30)

    def test_pipeline_training(self):
        """Test pipeline training and ensure accuracy is reasonable"""
        X_train, X_test, y_train, y_test = load_data()
        acc = train_and_evaluate(X_train, X_test, y_train, y_test)
        self.assertGreater(acc, 0.5, "Accuracy should be above 50%")

    def test_feature_selection(self):
        """Check if correct number of features are selected"""
        X_train, _, y_train, _ = load_data()
        pipeline = create_pipeline(k=2)
        pipeline.fit(X_train, y_train)
        selected_features = pipeline.named_steps['feature_selection'].get_support(indices=True)
        self.assertEqual(len(selected_features), 2)

# -------------------------------------------
# Step 5: Execute full workflow
# -------------------------------------------
if __name__ == "__main__":
    # Run the main process
    try:
        X_train, X_test, y_train, y_test = load_data()
        train_and_evaluate(X_train, X_test, y_train, y_test)
        # Run unit tests
        unittest.main(argv=[''], exit=False)
    except Exception as err:
        print(f"Execution failed: {err}")


..

Accuracy: 1.00


.
----------------------------------------------------------------------
Ran 3 tests in 0.308s

OK


Accuracy: 1.00
