In [None]:
import numpy as np

from afqinsight.datasets import make_classification, make_group_classification
from keras_ssg_lasso import sgl

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold

from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.core import Dense, Activation
from keras.models import Sequential
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasClassifier

# First try without sparse groups

In [None]:
n_samples=50
n_features=20
n_informative=4
n_redundant=0
n_repeated=0

X, y, idx = make_classification(
    n_samples=n_samples,
    n_features=n_features,
    n_informative=n_informative,
    n_redundant=n_redundant,
    n_repeated=n_repeated,
    flip_y=0.00,
    class_sep=10.0,
    n_classes=2,
    useful_indices=True,
    random_state=42
)

In [None]:
print(idx)

In [None]:
y_class = np.transpose(np.array([y, 1-y], dtype=np.int32))

x_train, x_test, y_train, y_test = train_test_split(
    X, y_class, test_size=0.15, stratify=y
)

In [None]:
def create_classification_model(lambda_=0.1):
    model = Sequential()
    model.add(Dense(2, input_dim=n_features, activation='softmax', kernel_regularizer=regularizers.l1(lambda_)))
    adam = Adam(lr=0.05)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    return model

model = KerasClassifier(
    build_fn=create_classification_model,
    verbose=0
)

In [None]:
# define the grid search parameters
# batch_size = [32, 64]
epochs = [100]
lambdas = np.logspace(-4, 4, 20)
param_grid = dict(epochs=epochs, lambda_=lambdas)
grid = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=5,
    n_jobs=-1,
    verbose=1
)

In [None]:
grid_result = grid.fit(x_train, y_train)

In [None]:
best_model = grid.best_estimator_.model

In [None]:
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=3, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=0, save_best_only=True) # save best model

best_model.fit(X,y_class,validation_data=(x_test,y_test),callbacks=[monitor, checkpointer],verbose=1,epochs=1000)
best_model.load_weights('best_weights.hdf5') # load weights from best model
pred = best_model.predict(x_test)

In [None]:
pred > 0.5

In [None]:
beta_hat = best_model.get_weights()[0]

In [None]:
print('##.   coef  important  > 10^-3')
print('--- ------- --------- ---------')
for i, (b, beta) in enumerate(zip(idx, beta_hat[:, 0])):
    print('{:02d}. {:+6.4f}   {:5s}     {:5s}'.format(i, beta, str(b), str(abs(beta) > 1e-2)))

# Do it again for a sparse group classification problem

In [None]:
n_samples=50
n_groups=10
n_informative_groups=2
n_features_per_group=20
n_informative_per_group=5
n_redundant_per_group=0
n_repeated_per_group=0

X, y, idx = make_group_classification(
    n_samples=n_samples,
    n_groups=n_groups,
    n_informative_groups=n_informative_groups,
    n_features_per_group=n_features_per_group,
    n_informative_per_group=n_informative_per_group,
    n_redundant_per_group=n_redundant_per_group,
    n_repeated_per_group=n_repeated_per_group,
    n_classes=2,
    n_clusters_per_class=2,
    flip_y=0.0,
    class_sep=10.0,
    shuffle=True,
    useful_indices=True,
)

In [None]:
y_class = np.transpose(np.array([y, 1-y], dtype=np.int32))

x_train, x_test, y_train, y_test = train_test_split(
    X, y, test_size=0.15, stratify=y
)

In [None]:
groups = np.concatenate([np.ones(n_features_per_group) * i for i in range(n_groups)])
ind_sparse = np.ones_like(groups)

In [None]:
def create_classification_model(alpha=0.1, lambda_=0.1):
    model = sgl.SSGL_LogisticRegression(
        dim_input=n_groups*n_features_per_group, n_classes=2, groups=groups, indices_sparse=ind_sparse,
        n_epochs=500, alpha=alpha, lambda_=lambda_, optimizer='adam',
        validation_split=0.0, early_stopping_patience=0,
        verbose=True
    )    
    return model.model

model = KerasClassifier(
    build_fn=create_classification_model,
    verbose=0
)

In [None]:
# define the grid search parameters
# batch_size = [32, 64]
epochs = [50]
alphas = np.array([0.05, 0.5, 0.95])
lambdas = np.logspace(-4, 4, 20)
param_grid = dict(epochs=epochs, alpha=alphas, lambda_=lambdas)
cv_generator = RepeatedStratifiedKFold(n_splits=3, n_repeats=3)
scoring = {'AUC': 'roc_auc', 'Accuracy': 'accuracy'}
grid = GridSearchCV(estimator=model,
                    param_grid=param_grid,
                    cv=cv_generator,
                    scoring=scoring,
                    refit='AUC',
                    n_jobs=-1,
                    verbose=5)

In [None]:
grid_result = grid.fit(x_train, y_train)

In [None]:
best_model = grid.best_estimator_.model
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=3, verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath="best_weights.hdf5", verbose=0, save_best_only=True) # save best model

best_model.fit(X,y,validation_data=(x_test,y_test),callbacks=[monitor, checkpointer],verbose=1,epochs=1000)
best_model.load_weights('best_weights.hdf5') # load weights from best model
pred = best_model.predict(x_test)

In [None]:
print(grid.best_params_)

In [None]:
beta_hat = best_model.get_weights()[0]

In [None]:
print('##.    coef  important  > 10^-2')
print('---- ------- --------- ---------')
for i, (b, beta) in enumerate(zip(idx, beta_hat[:, 0])):
    print('{:03d}. {:+6.4f}   {:5s}     {:5s}'.format(i, beta, str(b), str(abs(beta) >= 1e-2)))

In [None]:
y_pred = pred > 0.5

In [None]:
for yt, yp in zip(y_test, y_pred):
    print(yt, yp)