In [2]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import random

In [8]:
class FeatureSelectionEnv:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.n_features = X.shape[1]
        self.state = [0] * self.n_features  # Start with no features selected
        self.selected_features = []
        self.classifier = make_pipeline(StandardScaler(), SVC(kernel='rbf'))
    
    def reset(self):
        self.state = [0] * self.n_features
        self.selected_features = []
        return self.state
    
    def step(self, action):
        if action < 0 or action >= self.n_features:
            raise ValueError("Invalid action")
        
        self.state[action] = 1 - self.state[action]  # Toggle feature selection
        self.selected_features = [i for i, val in enumerate(self.state) if val == 1]
        
        if len(self.selected_features) == 0:
            reward = 0
        else:
            X_selected = self.X[:, self.selected_features]
            scores = cross_val_score(self.classifier, X_selected, self.y, cv=5)
            reward = scores.mean()
        
        done = len(self.selected_features) == self.n_features
        return self.state, reward, done
    
    def show(self):
        print(f"Selected features: {self.selected_features}")

In [9]:
# Load dataset
data = load_iris()
X, y = data.data, data.target

In [10]:
# Display sample of iris dataset
df_iris = pd.DataFrame(X, columns=data.feature_names)
df_iris['target'] = y
df_iris.sample(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
13,4.3,3.0,1.1,0.1,0
14,5.8,4.0,1.2,0.2,0
92,5.8,2.6,4.0,1.2,1
27,5.2,3.5,1.5,0.2,0
146,6.3,2.5,5.0,1.9,2


In [11]:
# Initialize environment
env = FeatureSelectionEnv(X, y)

In [13]:
# Example usage
state = env.reset()
done = False
while not done:
    action = random.randint(0, env.n_features - 1)
    state, reward, done = env.step(action)
    env.show()
    print(f"Reward: {reward}")

Selected features: [2]
Reward: 0.9533333333333334
Selected features: [1, 2]
Reward: 0.9400000000000001
Selected features: [0, 1, 2]
Reward: 0.9066666666666668
Selected features: [0, 1]
Reward: 0.8066666666666666
Selected features: [0, 1, 3]
Reward: 0.96
Selected features: [1, 3]
Reward: 0.9533333333333334
Selected features: [1, 2, 3]
Reward: 0.9666666666666666
Selected features: [0, 1, 2, 3]
Reward: 0.9666666666666666
