<a href="https://colab.research.google.com/github/priyansuapk/ml-lab-exam/blob/main/backward.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Convert to DataFrame for easier manipulation
df = pd.DataFrame(X, columns=iris.feature_names)
df['target'] = y

# Function to perform Backward Elimination
def backward_elimination(X, y, model, metric_func):
    features = X.columns
    best_f1_score = 0
    best_features = features.tolist()

    # Start with all features
    while len(features) > 2:
        f1_scores = []

        # Test the model performance by removing one feature at a time
        for feature in features:
            X_temp = X.drop(columns=[feature])
            X_train, X_test, y_train, y_test = train_test_split(X_temp, y, test_size=0.3, random_state=42)

            # Train the model
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            # Calculate F1 score
            score = metric_func(y_test, y_pred, average='weighted')
            f1_scores.append((feature, score))

        # Sort by F1 score and select the feature with the lowest score
        f1_scores.sort(key=lambda x: x[1])  # Sort by F1 score
        worst_feature, worst_score = f1_scores[0]

        # Remove the worst feature
        features = features.drop(worst_feature)

        # Update the best F1 score and best feature set
        if worst_score > best_f1_score:
            best_f1_score = worst_score
            best_features = features.tolist()

    return best_features, best_f1_score

# Initialize model and F1 score metric
model = LogisticRegression(max_iter=200)
metric_func = f1_score

# Perform Backward Elimination to find the optimal 2 features
best_features, best_f1_score = backward_elimination(df.drop(columns='target'), y, model, metric_func)

# Output the best features and their F1 score
print("Best 2 Features:", best_features)
print("Best F1 Score:", best_f1_score)


Best 2 Features: ['sepal width (cm)', 'petal width (cm)']
Best F1 Score: 1.0
