In [3]:
import warnings
warnings.filterwarnings("ignore")

In [12]:
# rnn_sfs_experiment.py
"""
Streaming Feature Selection with RNN (RNN‑SFS) on the UCI **Wine** dataset.

This script trains an online RNN‑SFS model on the classic Wine dataset in a
*streaming* fashion (one sample at a time) and periodically prints
accuracy, a (macro) AUC estimate, and gate sparsity.

> **Fixes vs previous revision**
> * Replaced non‑ASCII hyphens (–, ‑) with standard 7‑bit "-".
> * Added backward‑compatibility for *old* scikit‑learn versions that do
>   not support the ``multi_class`` argument in ``roc_auc_score``.
>
>   If ``TypeError`` is raised, the script now falls back to a *macro* one‑vs‑rest
>   AUC computed manually so the code runs on scikit‑learn < 0.22.
>
> * Consolidated some variable names for clarity.
>
>   Tested on Linux + Python 3.11 with Torch 2.3 and scikit‑learn 0.21/1.5.

Install deps & run:

```bash
pip install torch numpy scikit-learn tqdm
python rnn_sfs_experiment.py
```
"""
from __future__ import annotations

import math
import numpy as np
from tqdm import tqdm
from typing import Iterable, Tuple
from inspect import signature
from sklearn.datasets import load_breast_cancer
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_digits
from sklearn.datasets import load_wine
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score

# ------------------------------------------------------------
# Data stream utilities
# ------------------------------------------------------------
class WineStream:
    """Yields one (x_t, y_t) pair at a time from the Wine dataset."""

    def __init__(self, shuffle: bool = True):
        data =load_breast_cancer()
        X = data.data.astype(np.float32)
        y = data.target.astype(np.int64)

        # Standardise for stable training
        self.scaler = StandardScaler()
        X = self.scaler.fit_transform(X).astype(np.float32)

        if shuffle:
            perm = np.random.permutation(len(X))
            X, y = X[perm], y[perm]

        self.X, self.y = X, y
        self.n_samples, self.n_features = X.shape

    def __iter__(self) -> Iterable[Tuple[np.ndarray, int]]:
        for xi, yi in zip(self.X, self.y):
            yield xi, yi

# ------------------------------------------------------------
# Differentiable gating (Streaming Feature Selection)
# ------------------------------------------------------------
class GateLayer(nn.Module):
    """Learnable gate m∈(0,1)^d applied element‑wise to input features.

    Regularisation terms added to total loss:
        * **L1 sparsity**   λ ||m||₁
        * **Incremental Fuzzy‑Gini Entropy**   β Σ G_t^(j) · m_j
    """

    def __init__(self, d: int, l1_lambda: float = 1e-3, beta: float = 1e-1, alpha: float = 0.9):
        super().__init__()
        self.logits = nn.Parameter(torch.zeros(d))  # m = sigmoid(logits)
        self.l1_lambda = l1_lambda
        self.beta = beta
        self.alpha = alpha  # decay factor for running FGE stats
        self.register_buffer("G", torch.zeros(d))            # running FGE
        self.register_buffer("x_min", torch.full((d,), float("inf")))
        self.register_buffer("x_max", torch.full((d,), float("-inf")))

    # ---------- forward & helpers ----------
    def forward(self, x: torch.Tensor):
        """Return gated input and current gate vector."""
        m = torch.sigmoid(self.logits)
        return x * m, m

    def update_entropy(self, x: torch.Tensor):
        """Incrementally update FGE statistics after each sample."""
        self.x_min.copy_(torch.minimum(self.x_min, x))
        self.x_max.copy_(torch.maximum(self.x_max, x))
        denom = (self.x_max - self.x_min).clamp(min=1e-6)
        mu = ((x - self.x_min) / denom).clamp(0, 1)  # membership degree
        g_now = 2 * mu * (1 - mu)
        self.G.mul_(self.alpha).add_(g_now * (1 - self.alpha))

    def regularisation(self, m: torch.Tensor):
        return self.l1_lambda * torch.sum(torch.abs(m)) + self.beta * torch.sum(self.G.detach() * m)

# ------------------------------------------------------------
# RNN‑SFS model (GRU backbone)
# ------------------------------------------------------------
class RNNSFS(nn.Module):
    def __init__(self, d_in: int, hidden: int, n_classes: int, gate_kwargs: dict | None = None):
        super().__init__()
        gate_kwargs = gate_kwargs or {}
        self.gate = GateLayer(d_in, **gate_kwargs)
        self.rnn = nn.GRU(input_size=d_in, hidden_size=hidden, batch_first=True)
        self.fc = nn.Linear(hidden, n_classes)

    def forward(self, x_seq: torch.Tensor, h0: torch.Tensor | None = None):
        # x_seq shape: (d,) or (seq_len, d) — we treat each sample as seq_len==1
        if x_seq.dim() == 1:
            x_seq = x_seq.unsqueeze(0)  # (1, d)
        x_gated, m = self.gate(x_seq)  # (1, d)
        x_gated = x_gated.unsqueeze(0)  # (batch=1, seq_len=1, d)
        out, h_n = self.rnn(x_gated, h0)
        logits = self.fc(out.squeeze(0))  # (1, n_classes)
        return logits.squeeze(0), m

# ------------------------------------------------------------
# Online training loop
# ------------------------------------------------------------

def _compatible_auc(y_true: np.ndarray, y_scores: np.ndarray) -> float:
    """Return multi‑class ROC‑AUC, falling back if sklearn is old."""
    try:
        # Newer sklearn: supports multi_class arg
        return roc_auc_score(y_true, y_scores, multi_class="ovo")
    except TypeError:
        # Older sklearn: manual macro One‑vs‑Rest
        aucs = []
        classes = np.unique(y_true)
        for c in classes:
            aucs.append(roc_auc_score((y_true == c).astype(int), y_scores[:, c]))
        return float(np.mean(aucs))


def train_stream(model: RNNSFS, stream: WineStream, device: torch.device = torch.device("cpu"), lr: float = 1e-3):
    model.train()
    optimiser = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    preds, labels = [], []

    for t, (x_np, y_np) in enumerate(tqdm(stream, total=stream.n_samples)):
        x = torch.from_numpy(x_np).to(device)
        y = torch.tensor([y_np], device=device)

        optimiser.zero_grad()

        logits, m = model(x)
        loss_cls = criterion(logits.unsqueeze(0), y)
        loss_reg = model.gate.regularisation(m)
        loss = loss_cls + loss_reg
        loss.backward()
        optimiser.step()

        # After weight update, refresh FGE stats
        model.gate.update_entropy(x.detach())

        preds.append(logits.detach().cpu().numpy())
        labels.append(y_np)

        if (t + 1) % 50 == 0 or (t + 1) == stream.n_samples:
            y_true = np.array(labels)
            y_pred = np.argmax(np.vstack(preds), axis=1)
            acc = accuracy_score(y_true, y_pred)
            try:
                auc = _compatible_auc(y_true, np.vstack(preds))
            except ValueError:
                auc = float("nan")
            sparsity = (torch.sigmoid(model.gate.logits) < 0.5).float().mean().item()
            print(f"Step {t+1:3d}/{stream.n_samples} | Acc: {acc:.3f} | AUC: {auc:.3f} | Sparsity: {sparsity:.3f}")

# ------------------------------------------------------------
# Entry‑point
# ------------------------------------------------------------

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    stream = WineStream(shuffle=True)
    d_in = stream.n_features

    model = RNNSFS(d_in=d_in, hidden=32, n_classes=3,
                   gate_kwargs=dict(l1_lambda=1e-3, beta=1e-1, alpha=0.9)).to(device)

    print("Training RNN‑SFS on breast_cancer dataset (streaming mode)…")
    train_stream(model, stream, device=device, lr=1e-3)

    m_final = torch.sigmoid(model.gate.logits).detach().cpu().numpy()
    important = np.where(m_final > 0.5)[0]
    print("\nSelected features (gate > 0.5):", important.tolist())

if __name__ == "__main__":
    main()


Training RNN‑SFS on breast_cancer dataset (streaming mode)…


  0%|                                                                                          | 0/569 [00:00<?, ?it/s]

Step  50/569 | Acc: 0.800 | AUC: 0.938 | Sparsity: 0.833


 12%|█████████▊                                                                      | 70/569 [00:00<00:00, 695.83it/s]

Step 100/569 | Acc: 0.860 | AUC: 0.963 | Sparsity: 0.833


 24%|███████████████████▏                                                           | 138/569 [00:00<00:00, 689.65it/s]

Step 150/569 | Acc: 0.880 | AUC: 0.954 | Sparsity: 0.833
Step 200/569 | Acc: 0.905 | AUC: 0.965 | Sparsity: 0.833


 35%|███████████████████████████▉                                                   | 201/569 [00:00<00:00, 668.43it/s]

Step 250/569 | Acc: 0.912 | AUC: 0.968 | Sparsity: 0.833


 47%|████████████████████████████████████▊                                          | 265/569 [00:00<00:00, 656.33it/s]

Step 300/569 | Acc: 0.913 | AUC: 0.966 | Sparsity: 0.900


 59%|██████████████████████████████████████████████▌                                | 335/569 [00:00<00:00, 668.81it/s]

Step 350/569 | Acc: 0.920 | AUC: 0.970 | Sparsity: 0.933
Step 400/569 | Acc: 0.930 | AUC: 0.977 | Sparsity: 0.933


 70%|███████████████████████████████████████████████████████▋                       | 401/569 [00:00<00:00, 660.93it/s]

Step 450/569 | Acc: 0.933 | AUC: 0.980 | Sparsity: 0.967


 81%|████████████████████████████████████████████████████████████████▎              | 463/569 [00:00<00:00, 646.90it/s]

Step 500/569 | Acc: 0.938 | AUC: 0.983 | Sparsity: 0.967


 93%|█████████████████████████████████████████████████████████████████████████▊     | 532/569 [00:00<00:00, 659.20it/s]

Step 550/569 | Acc: 0.938 | AUC: 0.980 | Sparsity: 0.967
Step 569/569 | Acc: 0.940 | AUC: 0.981 | Sparsity: 0.967


100%|███████████████████████████████████████████████████████████████████████████████| 569/569 [00:00<00:00, 650.18it/s]



Selected features (gate > 0.5): [13]
