In [1]:
import sys
from pathlib import Path
from addict import Dict

sys.path.append('../../')
sys.path.append('/disk/neuro/sktime-dl')

In [2]:
import numpy as np
import pandas as pd
import pylab as plt
import seaborn as sns
import plotly.express as px

from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import GroupShuffleSplit
from sklearn.ensemble import RandomForestClassifier

from examples.utils.config import Config
from examples.utils.dataset_adapters import retina_dataset
from pyspikelib import TrainNormalizeTransform
from pyspikelib.utils import simple_undersampling
from pyspikelib.mpladeq import prettify, beautify_mpl, boxplot

import tensorflow as tf
import tensorflow.keras as keras

tf.random.Generator = None 

import sktime_dl
from sktime_dl.deeplearning import CNNClassifier

beautify_mpl()

In [3]:
config_dict = {
    'seed': 0,
    'window': 50,
    'step': 50,
    'train_subsample_factor': 0.7,
    'test_subsample_factor': 0.7,
    'delimiter': None,
    'dataset': '../../data/retina/mode_paper_data',
    'fstate': 'randomly_moving_bar',
    'mstate': 'white_noise_checkerboard',
}

config = Config(config_dict)
np.random.seed(config.seed)

In [4]:
retinal_spike_data = retina_dataset(config.dataset)

In [5]:
group_split = GroupShuffleSplit(n_splits=1, test_size=0.5)
X = np.hstack(
    [
        retinal_spike_data[config.fstate].series.values,
        retinal_spike_data[config.mstate].series.values,
    ]
)
y = np.hstack(
    [
        np.ones(retinal_spike_data[config.fstate].shape[0]),
        np.zeros(retinal_spike_data[config.mstate].shape[0]),
    ]
)
groups = np.hstack(
    [
        retinal_spike_data[config.fstate].groups.values,
        retinal_spike_data[config.mstate].groups.values,
    ]
)

In [6]:
for train_index, test_index in group_split.split(X, y, groups):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

X_train = pd.DataFrame({'series': X_train, 'groups': groups[train_index]})
X_test = pd.DataFrame({'series': X_test, 'groups': groups[test_index]})

In [7]:
normalizer = TrainNormalizeTransform(
    window=config.window, 
    step=config.step, 
    n_samples=None
)
X_train, y_train = normalizer.transform(X_train, y_train, delimiter=config.delimiter)
X_test, y_test = normalizer.transform(X_test, y_test, delimiter=config.delimiter)

In [8]:
print('Dataset size: train {}, test {}'.format(X_train.shape, X_test.shape))
print('Average target: train {}, test {}'.format(y_train.mean(), y_test.mean()))

Dataset size: train (23187, 50), test (23908, 50)
Average target: train 0.7639194376159054, test 0.7755144721432157


In [9]:
import tensorflow.keras as keras
import matplotlib.pyplot as plt
from IPython.display import clear_output
import plotly.graph_objects as go


class PlotLearning(keras.callbacks.Callback):
    
    def __init__(self, *args, update_freq=1, **kwargs):
        super().__init__(*args, **kwargs)
        self.update_freq = update_freq
        self.loss_log_freq = 10
    
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        self.acc = []
        self.val_acc = []
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.i += 1
        
        if (self.i % self.loss_log_freq) == 0:
            self.logs.append(logs)
            self.x.append(self.i)
            self.losses.append(logs.get('loss'))
            self.val_losses.append(logs.get('val_loss'))
            self.acc.append(logs.get('accuracy'))
            self.val_acc.append(logs.get('val_accuracy'))
        
        if (self.i % self.update_freq) == 0:
            clear_output(wait=True)

            df = pd.DataFrame({'epoch': self.x + self.x, 
                               'loss': self.losses + self.val_losses,
                               'dataset': ['train'] * len(self.losses) + ['val'] * len(self.val_losses)})
            fig = px.line(df, x='epoch', 
                          y='loss', title='loss',
                          color='dataset',
                          height=400, width=800)

            df2 = pd.DataFrame({'epoch': self.x + self.x, 
                                'accuracy': self.acc + self.val_acc,
                                'dataset': ['train'] * len(self.acc) + ['val'] * len(self.val_acc)})
            fig2 = px.line(df2, x='epoch', 
                           y='accuracy', title='accuracy',
                           color='dataset',
                           height=400, width=800)
            fig.show()
            fig2.show()

In [10]:
from pyspikelib.utils import simple_undersampling

Xs_train, ys_train = simple_undersampling(
    pd.DataFrame(X_train), y_train, subsample_size=0.9
)
Xs_test, ys_test = simple_undersampling(
    pd.DataFrame(X_test), y_test, subsample_size=0.9
)

Xs_train.shape, Xs_test.shape, ys_train.mean(), ys_test.mean()

((9853, 50), (9660, 50), 0.49781792347508375, 0.4997929606625259)

In [11]:
baseline_forest = RandomForestClassifier(n_estimators=100, max_depth=5, n_jobs=-1)
baseline_forest.fit(Xs_train, ys_train)

RandomForestClassifier(max_depth=5, n_jobs=-1)

In [12]:
accuracy_score(ys_test, baseline_forest.predict(Xs_test)), \
roc_auc_score(ys_test, baseline_forest.predict_proba(Xs_test)[:, 1])

(0.6875776397515528, 0.8242741105279907)

In [14]:
ce_loss = tf.keras.losses.BinaryCrossentropy(
    from_logits=False, label_smoothing=0, reduction="auto", name="binary_crossentropy"
)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

network = CNNClassifier(nb_epochs=1000,
                        batch_size=1024,
                        verbose=False,
                        loss=ce_loss,
                        optimizer=optimizer,
                        callbacks=[PlotLearning(update_freq=50)])

network.fit(Xs_train, ys_train,
            validation_X=Xs_test,
            validation_y=ys_test)

CNNClassifier(batch_size=1024,
              callbacks=[<__main__.PlotLearning object at 0x7f74af14d208>],
              loss=<tensorflow.python.keras.losses.BinaryCrossentropy object at 0x7f74af14d400>,
              nb_epochs=1000,
              optimizer=<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x7f74af14d438>,
              random_state=RandomState(MT19937) at 0x7F74AF1E2468)

<Figure size 720x576 with 0 Axes>