In [None]:
import pylab as p
import numpy as np
import pandas as pd
import seaborn as sns
import pyspikelib.utils as spkutil
import pyspikelib.mpladeq as mpladeq

from pathlib import Path

sns.set(font_scale=1.7, style='ticks')
mpladeq.beautify_mpl()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
np.random.seed(15)

In [None]:
datapath = Path('../data')

wake_data = spkutil.load_parquet(datapath / 'wake.parq')
sleep_data = spkutil.load_parquet(datapath / 'sleep.parq')

In [None]:
data = {}

data['wake_train'], data['wake_test'] = spkutil.split_by_spikes(wake_data, ratio=0.5)
data['sleep_train'], data['sleep_test'] = spkutil.split_by_spikes(sleep_data, ratio=0.5)

In [None]:
p.plot(data['wake_train']['series'][5][:1000])

mpladeq.prettify()
p.xlabel('ISI # in the spike train')
p.ylabel('ISI value, ms')

In [None]:
window_size = 100
step_size = 100
total_samples = 5000

crop_data = {}
for key in data:
    crop_data[key] = spkutil.crop_isi_samples(data[key],
                                              window_size=window_size,
                                              step_size=step_size,
                                              total_samples=total_samples)

In [None]:
[(key, crop_data[key]['series'].shape) for key in crop_data]

In [None]:
tsdata = {}
for key in data:
    tsdata[key] = spkutil.tsfresh_vectorize(crop_data[key]['series'],
                                            feature_dict='distribution_features',
                                            n_jobs=24)

In [None]:
tsdata['wake_train'].head(10)

In [None]:
samples = 4500
indices = np.random.choice(total_samples, samples)

X_train = pd.concat([tsdata['wake_train'].iloc[indices, :], tsdata['sleep_train'].iloc[indices, :]])
y_train = np.array([0] * indices.shape[0] + [1] * indices.shape[0])

X_test = pd.concat([tsdata['wake_test'].iloc[indices, :], tsdata['sleep_test'].iloc[indices, :]])
y_test = np.array([0] * indices.shape[0] + [1] * indices.shape[0])

In [None]:
X_train, scaler = spkutil.preprocess_tsfresh_features(X_train,
                                                      remove_low_variance=True)

X_test, _ = spkutil.preprocess_tsfresh_features(X_test,
                                                scaler=scaler,
                                                remove_low_variance=True)

X_train, X_test = spkutil.train_test_common_features(X_train, X_test)

In [None]:
embed_features = X_train.columns.values

In [None]:
import h2o
from h2o.automl import H2OAutoML

h2o.init(nthreads=-1, max_mem_size=12)

In [None]:
X_y_train_h = h2o.H2OFrame(pd.concat([X_train.reset_index(drop=True), 
           pd.Series(y_train, name='target')], axis='columns'))

In [None]:
X_y_train_h['target'] = X_y_train_h['target'].asfactor()

In [None]:
X_test_h = h2o.H2OFrame(X_test.reset_index(drop=True))

X_y_train_h.describe()

In [None]:
aml = H2OAutoML(max_runtime_secs=60,
                max_models=None,
                seed=42)

In [None]:
aml.train(x=list(embed_features),
          y='target',
          training_frame=X_y_train_h)

lb = aml.leaderboard
model_ids = list(lb['model_id'].as_data_frame().iloc[:, 0])
out_path = "."

for m_id in model_ids:
    mdl = h2o.get_model(m_id)
    h2o.save_model(model=mdl, path=out_path, force=True)

h2o.export_file(lb, os.path.join(out_path, 'aml_leaderboard.h2o'), force=True)