To prepare the required environment using conda/mamba, run the following commands:

```bash
mamba create -n autokeras
mamba activate autokeras
mamba install python xarray dask netCDF4 bottleneck scikit-learn tqdm nilearn seaborn matplotlib
pip install autokeras -U
```

In [19]:

import autokeras as ak

import tensorflow as tf


import numpy as np

from tqdm import tqdm

from sklearn.datasets import make_classification
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from python.acnets.pipeline import Connectivity, ConnectivityVectorizer

from IPython.display import clear_output

In [2]:
atlas = 'dosenbach2010'
kind = 'chatterjee'
aggregate_networks=True

subjects = Connectivity().transform(None).coords['subject'].values
groups = np.array([s[:4] for s in subjects])

X = subjects.reshape(-1, 1)
y = groups

factor_name = 'networks' if aggregate_networks else 'regions'

from sklearn.preprocessing import LabelEncoder
y_encoder = LabelEncoder()
y = y_encoder.fit_transform(y)


prep_pipe  = Pipeline([
    ('connectivity', Connectivity(atlas, kind, aggregate_networks)),
    ('vectorize', ConnectivityVectorizer()),
])

X = prep_pipe.fit_transform(X, y)
feature_names = prep_pipe.get_feature_names_out()

In [3]:
# DEBUG X, y = make_classification(32, 160, n_classes=2)

In [61]:
cv = StratifiedShuffleSplit(n_splits=2, test_size=0.2)

val_scores = []


for train, test in tqdm(cv.split(X, y), total=cv.get_n_splits(X, y)):
    
    auto_model = ak.AutoModel(
        inputs=ak.StructuredDataInput(
            column_names=feature_names.tolist(),
            column_types={feat: 'numerical' for feat in feature_names},
            name=f'{atlas}_{factor_name}_{kind}'),
        outputs=ak.ClassificationHead(metrics=['accuracy'], loss=tf.keras.losses.BinaryCrossentropy()),
        project_name=f'tmp/autokeras/{atlas}_{factor_name}_{kind}_{np.random.randint(1e6)}',
        tuner='bayesian',
        overwrite=True,
        max_trials=5,
        objective='val_accuracy'
    )

    history = auto_model.fit(
        X[train], y[train],
        validation_data=(X[test], y[test]),
        callbacks=[tf.keras.callbacks.EarlyStopping('val_loss', patience=10, restore_best_weights=True)],
        epochs=1000,
        batch_size=32,
        verbose=1)

    # y_pred = auto_model.predict(X[test])
    # score = accuracy_score(y[test], y_pred)
    loss, val_score = auto_model.evaluate(X[test], y[test], batch_size=32)

    val_scores.append(val_score)
    print('val_accuracy: ', val_score)

clear_output()
print('val_scores:', val_scores)

val_scores: [1.0, 0.8571428656578064]


In [63]:
print('cross-validated val_accuracy:', np.mean(val_scores))

print('one of the fitted model:', auto_model.export_model().summary())

cross-validated val_accuracy: 0.9285714328289032
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 21)]              0         
                                                                 
 multi_category_encoding (Mu  (None, 21)               0         
 ltiCategoryEncoding)                                            
                                                                 
 dense (Dense)               (None, 1024)              22528     
                                                                 
 re_lu (ReLU)                (None, 1024)              0         
                                                                 
 dense_1 (Dense)             (None, 1)                 1025      
                                                                 
 classification_head_1 (Acti  (None, 1)                0         
 vation)    