## Import Libraries

In [15]:
import numpy as np
import pandas as pd
import time
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SequentialFeatureSelector

## Load and Split the Dataset

In [16]:
# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

In [17]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Implement TD-learning for Feature Selection using PyTorch

In [18]:
class FeatureSelectionEnv:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.n_features = X.shape[1]
        self.state = [0] * self.n_features  # Start with no features selected
        self.selected_features = []
        self.classifier = make_pipeline(StandardScaler(), SVC(kernel='rbf'))
    
    def reset(self):
        self.state = [0] * self.n_features
        self.selected_features = []
        return self.state
    
    def step(self, action):
        if action < 0 or action >= self.n_features:
            raise ValueError("Invalid action")
        
        self.state[action] = 1 - self.state[action]  # Toggle feature selection
        self.selected_features = [i for i, val in enumerate(self.state) if val == 1]
        
        if len(self.selected_features) == 0:
            reward = 0
        else:
            X_selected = self.X[:, self.selected_features]
            scores = cross_val_score(self.classifier, X_selected, self.y, cv=5)
            reward = scores.mean()
        
        return self.state, reward

In [19]:
# Initialize the environment
env = FeatureSelectionEnv(X_train, y_train)

In [20]:
# TD-learning algorithm using PyTorch
class TDLearningAgent(nn.Module):
    def __init__(self, n_features):
        super(TDLearningAgent, self).__init__()
        self.fc = nn.Linear(n_features, n_features)
    
    def forward(self, x):
        return self.fc(x)

In [21]:
n_features = X_train.shape[1]
agent = TDLearningAgent(n_features)
optimizer = optim.Adam(agent.parameters(), lr=0.01)
criterion = nn.MSELoss()

In [22]:
alpha = 0.025
gamma = 0.5
n_episodes = 5

In [23]:
start_time_td = time.time()

for episode in range(n_episodes):
    state = env.reset()
    state_tensor = torch.FloatTensor(state).unsqueeze(0)
    for t in range(n_features):
        action = np.random.choice(n_features)
        next_state, reward = env.step(action)
        next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
        
        q_values = agent(state_tensor)
        next_q_values = agent(next_state_tensor)
        
        target = q_values.clone()
        target[0, action] = q_values[0, action] + alpha * (reward + gamma * torch.max(next_q_values).item() - q_values[0, action])
        
        loss = criterion(q_values, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        state = next_state
        state_tensor = next_state_tensor

end_time_td = time.time()

In [24]:
# Select features based on the learned Q-values
selected_features_td = [i for i, val in enumerate(env.state) if val == 1]

## Evaluate the TD-learning Selected Features

In [25]:
# Evaluate on training set
X_train_selected = X_train[:, selected_features_td]
scores_train_td = cross_val_score(env.classifier, X_train_selected, y_train, cv=5)
accuracy_train_td = scores_train_td.mean()

In [26]:
# Evaluate on test set
X_test_selected = X_test[:, selected_features_td]
scores_test_td = cross_val_score(env.classifier, X_test_selected, y_test, cv=5)
accuracy_test_td = scores_test_td.mean()

print(f"TD-learning - Training Accuracy: {accuracy_train_td}")
print(f"TD-learning - Test Accuracy: {accuracy_test_td}")
print(f"TD-learning - Running Time: {end_time_td - start_time_td} seconds")

TD-learning - Training Accuracy: 0.9626373626373625
TD-learning - Test Accuracy: 0.9739130434782609
TD-learning - Running Time: 5.696887254714966 seconds


## Forward and Backward Feature Selection for Comparison

In [27]:
# Forward Feature Selection
start_time_forward = time.time()
sfs_forward = SequentialFeatureSelector(env.classifier, n_features_to_select=len(selected_features_td), direction='forward', cv=5)
sfs_forward.fit(X_train, y_train)
end_time_forward = time.time()
selected_features_forward = sfs_forward.get_support(indices=True)

In [28]:
# Backward Feature Selection
start_time_backward = time.time()
sfs_backward = SequentialFeatureSelector(env.classifier, n_features_to_select=len(selected_features_td), direction='backward', cv=5)
sfs_backward.fit(X_train, y_train)
end_time_backward = time.time()
selected_features_backward = sfs_backward.get_support(indices=True)

In [29]:
# Evaluate Forward Feature Selection
X_train_forward = X_train[:, selected_features_forward]
scores_train_forward = cross_val_score(env.classifier, X_train_forward, y_train, cv=5)
accuracy_train_forward = scores_train_forward.mean()

X_test_forward = X_test[:, selected_features_forward]
scores_test_forward = cross_val_score(env.classifier, X_test_forward, y_test, cv=5)
accuracy_test_forward = scores_test_forward.mean()

print(f"Forward Selection - Training Accuracy: {accuracy_train_forward}")
print(f"Forward Selection - Test Accuracy: {accuracy_test_forward}")
print(f"Forward Selection - Running Time: {end_time_forward - start_time_forward} seconds")

Forward Selection - Training Accuracy: 0.9802197802197803
Forward Selection - Test Accuracy: 0.9739130434782609
Forward Selection - Running Time: 9.96624493598938 seconds


In [30]:
# Evaluate Backward Feature Selection
X_train_backward = X_train[:, selected_features_backward]
scores_train_backward = cross_val_score(env.classifier, X_train_backward, y_train, cv=5)
accuracy_train_backward = scores_train_backward.mean()

X_test_backward = X_test[:, selected_features_backward]
scores_test_backward = cross_val_score(env.classifier, X_test_backward, y_test, cv=5)
accuracy_test_backward = scores_test_backward.mean()

print(f"Backward Selection - Training Accuracy: {accuracy_train_backward}")
print(f"Backward Selection - Test Accuracy: {accuracy_test_backward}")
print(f"Backward Selection - Running Time: {end_time_backward - start_time_backward} seconds")

Backward Selection - Training Accuracy: 0.9846153846153847
Backward Selection - Test Accuracy: 0.9478260869565217
Backward Selection - Running Time: 13.678672313690186 seconds


## Neural Network-based Feature Selection

In [31]:
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

In [32]:
# Train the neural network
input_dim = X_train.shape[1]
model = SimpleNN(input_dim)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).unsqueeze(1)

start_time_nn = time.time()

n_epochs = 50
for epoch in range(n_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

end_time_nn = time.time()

In [33]:
# Select features based on the weights of the first layer
weights = model.fc1.weight.data.abs().sum(dim=0)
selected_features_nn = weights.topk(len(selected_features_td)).indices.numpy()

In [34]:
# Evaluate Neural Network-based Feature Selection
X_train_nn = X_train[:, selected_features_nn]
scores_train_nn = cross_val_score(env.classifier, X_train_nn, y_train, cv=5)
accuracy_train_nn = scores_train_nn.mean()

X_test_nn = X_test[:, selected_features_nn]
scores_test_nn = cross_val_score(env.classifier, X_test_nn, y_test, cv=5)
accuracy_test_nn = scores_test_nn.mean()

print(f"Neural Network - Training Accuracy: {accuracy_train_nn}")
print(f"Neural Network - Test Accuracy: {accuracy_test_nn}")
print(f"Neural Network - Running Time: {end_time_nn - start_time_nn} seconds")

Neural Network - Training Accuracy: 0.9538461538461538
Neural Network - Test Accuracy: 0.9391304347826086
Neural Network - Running Time: 0.11250686645507812 seconds


## Compare Results

In [35]:
# Compare the results
print("Comparison of Methods:")
print(f"TD-learning - Training Accuracy: {accuracy_train_td}, Test Accuracy: {accuracy_test_td}, Running Time: {end_time_td - start_time_td} seconds")
print(f"Forward Selection - Training Accuracy: {accuracy_train_forward}, Test Accuracy: {accuracy_test_forward}, Running Time: {end_time_forward - start_time_forward} seconds")
print(f"Backward Selection - Training Accuracy: {accuracy_train_backward}, Test Accuracy: {accuracy_test_backward}, Running Time: {end_time_backward - start_time_backward} seconds")
print(f"Neural Network - Training Accuracy: {accuracy_train_nn}, Test Accuracy: {accuracy_test_nn}, Running Time: {end_time_nn - start_time_nn} seconds")

Comparison of Methods:
TD-learning - Training Accuracy: 0.9626373626373625, Test Accuracy: 0.9739130434782609, Running Time: 5.696887254714966 seconds
Forward Selection - Training Accuracy: 0.9802197802197803, Test Accuracy: 0.9739130434782609, Running Time: 9.96624493598938 seconds
Backward Selection - Training Accuracy: 0.9846153846153847, Test Accuracy: 0.9478260869565217, Running Time: 13.678672313690186 seconds
Neural Network - Training Accuracy: 0.9538461538461538, Test Accuracy: 0.9391304347826086, Running Time: 0.11250686645507812 seconds
