In [12]:
import sys

from pathlib import Path
from addict import Dict
from copy import deepcopy

sys.path.append('../../')

In [13]:
import numpy as np
import pandas as pd
import pylab as plt
import seaborn as sns

from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import GroupShuffleSplit
from sklearn.ensemble import RandomForestClassifier

from examples.utils.config import Config
from examples.utils.dataset_adapters import retina_dataset
from spikebench import TrainNormalizeTransform
from spikebench.utils import simple_undersampling
from spikebench.train_encoders import ISIShuffleTransform
from spikebench.mpladeq import prettify, beautify_mpl, boxplot

import tensorflow as tf
import tensorflow.keras as keras

tf.random.Generator = None 

import sktime_dl
from sktime_dl.deeplearning import CNNClassifier

from viz_utils import PlotLearningCurveCallback

beautify_mpl()

In [14]:
config_dict = {
    'seed': 0,
    'window': 50,
    'step': 50,
    'train_subsample_factor': 0.7,
    'test_subsample_factor': 0.7,
    'delimiter': None,
    'dataset': '../../data/retina/mode_paper_data',
    'state': 'randomly_moving_bar',
}

config = Config(config_dict)
np.random.seed(config.seed)

In [15]:
retinal_spikes = retina_dataset(config.dataset)[config.state]

In [16]:
shuffler = ISIShuffleTransform()
retinal_spikes_shuffled = shuffler.transform(
    deepcopy(retinal_spikes), format='pandas', delimiter=config.delimiter
)

In [17]:
group_split = GroupShuffleSplit(n_splits=1, test_size=0.5)
X = np.hstack([retinal_spikes.series.values, retinal_spikes_shuffled.series.values])
y = np.hstack(
    [np.ones(retinal_spikes.shape[0]), np.zeros(retinal_spikes_shuffled.shape[0])]
)
groups = np.hstack(
    [retinal_spikes.groups.values, retinal_spikes_shuffled.groups.values]
)

for train_index, test_index in group_split.split(X, y, groups):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

X_train = pd.DataFrame({'series': X_train, 'groups': groups[train_index]})
X_test = pd.DataFrame({'series': X_test, 'groups': groups[test_index]})

In [18]:
for train_index, test_index in group_split.split(X, y, groups):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

X_train = pd.DataFrame({'series': X_train, 'groups': groups[train_index]})
X_test = pd.DataFrame({'series': X_test, 'groups': groups[test_index]})

In [19]:
normalizer = TrainNormalizeTransform(
    window=config.window, 
    step=config.step, 
    n_samples=None
)
X_train, y_train = normalizer.transform(X_train, y_train, delimiter=config.delimiter)
X_test, y_test = normalizer.transform(X_test, y_test, delimiter=config.delimiter)

In [20]:
print('Dataset size: train {}, test {}'.format(X_train.shape, X_test.shape))
print('Average target: train {}, test {}'.format(y_train.mean(), y_test.mean()))

Dataset size: train (33584, 50), test (38924, 50)
Average target: train 0.5, test 0.5


In [21]:
from spikebench.utils import simple_undersampling

Xs_train, ys_train = simple_undersampling(
    pd.DataFrame(X_train), y_train, subsample_size=0.9
)
Xs_test, ys_test = simple_undersampling(
    pd.DataFrame(X_test), y_test, subsample_size=0.9
)

Xs_train.shape, Xs_test.shape, ys_train.mean(), ys_test.mean()

((30225, 50), (35031, 50), 0.5002481389578164, 0.4953612514629899)

In [22]:
baseline_forest = RandomForestClassifier(n_estimators=100, max_depth=5, n_jobs=-1)
baseline_forest.fit(Xs_train, ys_train)

RandomForestClassifier(max_depth=5, n_jobs=-1)

In [23]:
accuracy_score(ys_test, baseline_forest.predict(Xs_test)), \
roc_auc_score(ys_test, baseline_forest.predict_proba(Xs_test)[:, 1])

(0.5271616568182467, 0.5441622808583683)

In [24]:
ce_loss = tf.keras.losses.BinaryCrossentropy(
    from_logits=False, label_smoothing=0, reduction="auto", name="binary_crossentropy"
)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

network = CNNClassifier(nb_epochs=1000,
                        batch_size=1024,
                        verbose=False,
                        loss=ce_loss,
                        optimizer=optimizer,
                        callbacks=[PlotLearningCurveCallback(update_freq=50)])

network.fit(Xs_train, ys_train,
            validation_X=Xs_test,
            validation_y=ys_test)

CNNClassifier(batch_size=1024,
              callbacks=[<viz_utils.PlotLearningCurveCallback object at 0x7fbfd6f3dda0>],
              loss=<tensorflow.python.keras.losses.BinaryCrossentropy object at 0x7fbfd6f3dac8>,
              nb_epochs=1000,
              optimizer=<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x7fbfd6f3de10>,
              random_state=RandomState(MT19937) at 0x7FBFD7279EB8)