In [39]:
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import random

In [40]:
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)  
y = pd.Series(data.target)
print(X.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2


In [41]:

def evaluate_feature_subset(X_subset, y):
    X_train, X_test, y_train, y_test = train_test_split(X_subset, y, test_size=0.3, random_state=42)
    
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    return accuracy_score(y_test, y_pred)

In [42]:

def generate_initial_subset(features):
    return random.sample(features, k=len(features) // 2)

In [43]:
def generate_neighbors(current_subset, all_features):
    neighbors = []
    for feature in all_features:
        if feature not in current_subset:
            neighbors.append(current_subset + [feature])
    for feature in current_subset:
        new_subset = current_subset.copy()
        new_subset.remove(feature)
        neighbors.append(new_subset)
    return neighbors

In [44]:
def hill_climbing_feature_selection(X, y, max_iterations=100):
    all_features = X.columns.tolist()
    current_subset = generate_initial_subset(all_features)
    current_score = evaluate_feature_subset(X[current_subset], y)
    
    for _ in range(max_iterations):
        neighbors = generate_neighbors(current_subset, all_features)
        best_neighbor = None
        best_score = current_score
        
        for neighbor in neighbors:
            neighbor_score = evaluate_feature_subset(X[neighbor], y)
            if neighbor_score > best_score:
                best_neighbor = neighbor
                best_score = neighbor_score
        
        if best_neighbor is None:
            break  
        
        current_subset = best_neighbor
        current_score = best_score
    
    return current_subset, current_score

In [45]:
best_subset, best_score = hill_climbing_feature_selection(X, y)

full_score = evaluate_feature_subset(X, y)

print("Best feature subset:", best_subset)
print("Accuracy with best subset:", best_score)
print("Accuracy with all features:", full_score)

Best feature subset: ['petal length (cm)', 'sepal length (cm)', 'petal width (cm)']
Accuracy with best subset: 1.0
Accuracy with all features: 1.0
