PCovR-Inspired Feature Selection 
==============================

In [None]:
from sklearn.datasets import load_boston
from sklearn.linear_model import Ridge as LR

from matplotlib import pyplot as plt
from matplotlib import cm

from tqdm.notebook import tqdm

import numpy as np

from skcosmo.feature_selection import PCovCUR, PCovFPS

cmap = cm.brg

For this, we will use the boston housing data provided by scikit-learn.

In [None]:
X, y = load_boston(return_X_y=True)
y = y.reshape(X.shape[0], -1)

## Feature Selection with CUR + PCovR
First, let's demonstrate CUR feature selection, and show the ten features chosen with a mixing parameter of 0.0, 0.5, and 1.0 perform.

In [None]:
n = 10
lr = LR()

for m in np.arange(0, 1.0, 0.2, dtype=np.float32):

    idx = PCovCUR(mixing=m, n_to_select=n).fit(X, y).selected_idx_

    plt.semilogy(
        range(1, n + 1),
        np.array(
            [
                lr.fit(X[:, idx[: ni + 1]], y).score(X[:, idx[: ni + 1]], y)
                for ni in range(n)
            ]
        ),
        label=m,
        c=cmap(m),
        marker="o",
    )

plt.xlabel("Number of Features Selected")
plt.ylabel(r"$R^2$")
plt.legend(title="Mixing \nParameter")
plt.show()

### Non-iterative feature selection with CUR + PCovR
Computing a non-iterative CUR is more efficient, although can result in poorer performance for larger datasets. you can also use a greater number of eigenvectors to compute the feature importance by varying `k`, but `k` should not exceed the number of targets, for optimal results.

In [None]:
n = 10
lr = LR()
m = 0.0


idx = PCovCUR(mixing=m, n_to_select=n).fit(X, y).selected_idx_
idx_non_it = PCovCUR(mixing=m, iterative=False, n_to_select=n).fit(X, y).selected_idx_

plt.loglog(
    range(1, n + 1),
    np.array(
        [
            lr.fit(X[:, idx[: ni + 1]], y).score(X[:, idx[: ni + 1]], y)
            for ni in range(n)
        ]
    ),
    label='Iterative',
    marker="o",
)
plt.loglog(
    range(1, n + 1),
    np.array(
        [
            lr.fit(X[:, idx_non_it[: ni + 1]], y).score(X[:, idx_non_it[: ni + 1]], y)
            for ni in range(n)
        ]
    ),
    label='Non-Iterative',
    marker="s",
)

plt.xlabel("Number of Features Selected")
plt.ylabel(r"$R^2$")
plt.legend()
plt.show()

## Feature Selection with FPS + PCovR
Next, let's look at FPS. We'll choose the first index from CUR at m = 1, which is 9.

In [None]:
n = 10
lr = LR()

for m in np.arange(0, 1.0, 0.2, dtype=np.float32):

    idx = (
        PCovFPS(mixing=m, initialize=9, n_to_select=n).fit(X, y).selected_idx_
    )

    plt.semilogy(
        range(1, n + 1),
        np.array(
            [
                lr.fit(X[:, idx[: ni + 1]], y).score(X[:, idx[: ni + 1]], y)
                for ni in range(n)
            ]
        ),
        label=m,
        c=cmap(m),
        marker="o",
    )

plt.xlabel("Number of Features Selected")
plt.ylabel(r"$R^2$")
plt.legend(title="Mixing \nParameter")
plt.show()