In [None]:
import datetime
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import special_ortho_group
from hisel.select import HSICSelector as Selector

In [None]:
dim_x = 10
dim_y = 3
dim_z = 5

batch_size = int(1e+4)
minibatch_size = 250
num_of_samples = int(1e+4)
number_of_epochs = 3

In [None]:
transform_tilde = special_ortho_group.rvs(dim_z)[:dim_y]
A = np.random.permutation(np.concatenate((np.eye(dim_z), np.zeros((dim_z, dim_x - dim_z))), axis=1).T).T
transform = transform_tilde @ A

In [None]:
x_samples = np.random.uniform(size=(num_of_samples, dim_x))
tt = np.repeat(np.expand_dims(transform, axis=0), repeats=num_of_samples, axis=0)
y_samples = (tt @ np.expand_dims(x_samples, axis=2))[:, :, 0]

In [None]:
projector = Selector(x_samples, y_samples)

In [None]:
curve = projector.regularization_curve(
    batch_size=batch_size,
    minibatch_size=minibatch_size,
    number_of_epochs=number_of_epochs
)

In [None]:
paths = projector.lasso_path()
paths

#### Sorted features by decreasing importance

In [None]:
print(f'Sorted features by decreasing importance: {projector.ordered_features}')

### Test selection

In [None]:
expected_features = np.argsort(np.sum(A, axis=0))[::-1][:dim_z]
noise_features = set(range(dim_x)).difference(set(expected_features))
selected_features = np.argsort(paths.iloc[-1, :])[::-1][:dim_z]
print(f'Expected features: {sorted(list(expected_features))}')
print(f'Selected features: {sorted(list(selected_features))}')
for ef in expected_features:
    for nf in noise_features:
        assert paths.iloc[-1, ef] > paths.iloc[-1, nf]
print('Selection was correct!')

## Regularisation curve

#### Cumulative beta

In [None]:
plt.plot(np.arange(1, 1+len(curve)), curve)

#### Absolute beta

In [None]:
plt.plot(np.arange(1, len(curve)), np.abs(np.diff(curve)))

## Lasso paths

In [None]:
paths.plot(figsize=(10, 5))