In [None]:
import os
import csv
import yaml
import wandb
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.layers import Dense, GRU, Dropout, Conv1D, MaxPooling1D, Flatten, GlobalMaxPooling1D
from keras.models import Sequential
from keras import layers, optimizers
from sklearn.model_selection import train_test_split
from wandb.keras import WandbCallback

In [None]:
# !pip install wandb

In [None]:
def read_sequential_preprocessing(folder):

    with open(f'{folder}/X_train.pickle', 'rb') as f: X_train = pickle.load(f)
    with open(f'{folder}/X_test.pickle', 'rb') as f: X_test = pickle.load(f)
    with open(f'{folder}/y_train.pickle', 'rb') as f: y_train = pickle.load(f)
    with open(f'{folder}/y_test.pickle', 'rb') as f: y_test = pickle.load(f)
    with open(f'{folder}/train_indexes.pickle', 'rb') as f: train_indexes = pickle.load(f)
    with open(f'{folder}/test_indexes.pickle', 'rb') as f: test_indexes = pickle.load(f)

    with open(f'{folder}//metadata.yaml') as file: settings = yaml.full_load(file)

    return X_train, X_test, y_train, y_test, train_indexes, test_indexes, settings

## Keras model

In [None]:
def generator(x, y, rows, moving_window_seconds, hz, step, batch_size, shuffle = False):

    lookback = moving_window_seconds * hz

    samples = np.zeros((batch_size, lookback, x.shape[-1]))
    targets = np.zeros((batch_size, y.shape[1]))

    i = 0
    while True:
        if shuffle:
            my_indexes = np.random.randint(0, len(rows) - 1, size=batch_size)
            my_rows = list(np.array(rows)[my_indexes])
        else:
            if i + batch_size >= len(rows) - 1:
                i = 0

            my_rows = rows[i:i + batch_size]
            # print(my_rows)

            i += batch_size

        for j, row in enumerate(my_rows):
            indices = range(row - lookback + 1, row + 1)
            samples[j] = x.iloc[indices]
            targets[j] = y[row]

        yield samples, targets

In [None]:
folders = [
    # './data/sensor/sequential_1hz_5sec',
    # './data/sensor/sequential_2hz_5sec',
    # './data/sensor/sequential_5hz_5sec',
    # './data/sensor/sequential_10hz_5sec',
    # './data/sensor/sequential_20hz_5sec',
    # './data/sensor/sequential_1hz_10sec',
    # './data/sensor/sequential_2hz_10sec',
    # './data/sensor/sequential_5hz_10sec',
    # './data/sensor/sequential_10hz_10sec',
    # './data/sensor/sequential_20hz_10sec',
    # './data/sensor/sequential_1hz_20sec',
    # './data/sensor/sequential_2hz_20sec',
    # './data/sensor/sequential_5hz_20sec',
    # './data/sensor/sequential_10hz_20sec',
    # './data/sensor/sequential_20hz_20sec',
    './data/sensor/sequential_position_5hz_2sec',
]

for my_folder in folders:
    X_train, X_test, y_train, y_test, train_indexes, test_indexes, settings = read_sequential_preprocessing(
        folder = my_folder
    )

    print(X_train.shape)

In [None]:
train_generator = generator(
    x = X_train,
    y = y_train,
    rows = train_indexes,
    moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
    hz = settings['HZ'],
    step = settings['STEP_SIZE'],
    batch_size = 128,
    shuffle=True
)

t, ts = next(train_generator)

In [None]:
print(t.shape)
print(ts.shape)

In [None]:
def create_CNN_model(convolution_size = 3, second_convolutional_layer = False, dropout = 0):
    model = Sequential()
    if second_convolutional_layer is False:
        model.add(Conv1D(32, convolution_size, activation = 'relu', input_shape = (None, X_train.shape[-1])))
        model.add(GlobalMaxPooling1D())
    else:
        model.add(Conv1D(32, convolution_size, activation = 'relu', input_shape = (None, X_train.shape[-1])))
        model.add(MaxPooling1D(3))
        model.add(Conv1D(32, convolution_size, activation = 'relu'))
        model.add(GlobalMaxPooling1D())
    if dropout > 0:
        model.add(Dropout(dropout))
    model.add(Dense(4, activation = 'softmax'))

    return model

In [None]:
def create_RNN_model(first_layer_size = 32, added_dense_layers = 0, recurrent_dropout = 0, dropout = 0):
    model = Sequential()
    model.add(GRU(first_layer_size, dropout=dropout, recurrent_dropout=recurrent_dropout, input_shape=(None, X_train.shape[-1])))

    if added_dense_layers == 1:
        model.add(Dense(128, input_shape = (None, X_train.shape[-1]), activation = 'relu'))
        model.add(Dense(4, activation = 'softmax'))
    elif added_dense_layers == 2:
        model.add(Dense(512, input_shape = (None, X_train.shape[-1]), activation = 'relu'))
        model.add(Dense(64, activation = 'relu'))
        model.add(Dense(4, activation = 'softmax'))
    elif added_dense_layers == 0:
        model.add(Dense(4, input_shape = (None, X_train.shape[-1]), activation = 'softmax'))

    return model

In [None]:
wandb.login()

In [None]:
for seconds in [10]:
    X_train, X_test, y_train, y_test, train_indexes, test_indexes, settings = read_sequential_preprocessing(
        folder = f'./data/sensor/sequential_index_10hz_{seconds}sec'
    )

    print(X_train.shape)
    print(y_train.shape)
    print(len(train_indexes))
    print(X_test.shape)
    print(y_test.shape)
    print(len(test_indexes))

    settings['BATCH_SIZE'] = 128

    train_generator = generator(
        x = X_train,
        y = y_train,
        rows = train_indexes,
        moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
        hz = settings['HZ'],
        step = settings['STEP_SIZE'],
        batch_size = settings['BATCH_SIZE'],
        shuffle=True
    )

    test_generator = generator(
        x = X_test,
        y = y_test,
        rows = test_indexes,
        moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
        hz = settings['HZ'],
        step = settings['STEP_SIZE'],
        batch_size = settings['BATCH_SIZE'],
        shuffle=False
    )

    train_steps = len(X_train) // settings['BATCH_SIZE']
    print(f'training steps: {train_steps}')
    test_steps = len(X_test) // settings['BATCH_SIZE']
    print(f'test steps: {test_steps}')

    learning_rate = 0.00002
    split = 'index'
    for dropout in [0.2, 0.5, 0.7]:
        for convolution_size in [3]:
            for second_convolutional_layer in [False]:
                model = create_CNN_model(convolution_size, second_convolutional_layer)

                config={
                    "architecture": "CNN",
                    "moving_window_size": settings['MOVING_WINDOW_SIZE'],
                    "hz": settings['HZ'],
                    "step_size": settings['STEP_SIZE'],
                    "aggregation": settings['AGGREGATION'],
                    "features": settings['FEATURES'],
                    'batch size': settings['BATCH_SIZE'],
                    "epochs": 20,
                    "layers": len(model.layers),
                    "train-test split": split,
                    "convolution_size": convolution_size,
                    "learning_rate": learning_rate,
                    "second_convolutional_layer": second_convolutional_layer,
                    "dropout": dropout
                }

                run = wandb.init(
                    project="CDL1",
                    entity="cdl1",
                    tags=[split, 'CNN', 'sequence length'],
                    name=config['architecture'],
                    config=config
                )

                # compile model
                model.compile(
                    loss = 'categorical_crossentropy',
                    optimizer = optimizers.RMSprop(learning_rate=learning_rate),
                    metrics = ['accuracy']
                )

                model.fit(
                    train_generator,
                    steps_per_epoch=train_steps,
                    epochs = config['epochs'],
                    validation_data = test_generator,
                    validation_steps=test_steps,
                    callbacks=[WandbCallback()]
                )

                run.finish()

In [None]:
for split in ['index']: # , 'position', 'user']:
    X_train, X_test, y_train, y_test, train_indexes, test_indexes, settings = read_sequential_preprocessing(
        folder = f'./data/sensor/sequential_{split}_10hz_2sec'
    )

    print(X_train.shape)
    print(y_train.shape)
    print(len(train_indexes))
    print(X_test.shape)
    print(y_test.shape)
    print(len(test_indexes))

    settings['BATCH_SIZE'] = 128

    train_generator = generator(
        x = X_train,
        y = y_train,
        rows = train_indexes,
        moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
        hz = settings['HZ'],
        step = settings['STEP_SIZE'],
        batch_size = settings['BATCH_SIZE'],
        shuffle=True
    )

    test_generator = generator(
        x = X_test,
        y = y_test,
        rows = test_indexes,
        moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
        hz = settings['HZ'],
        step = settings['STEP_SIZE'],
        batch_size = settings['BATCH_SIZE'],
        shuffle=False
    )

    train_steps = len(X_train) // settings['BATCH_SIZE']
    print(f'training steps: {train_steps}')
    test_steps = len(X_test) // settings['BATCH_SIZE']
    print(f'test steps: {test_steps}')

    for learning_rate in [0.00002]: # list(np.geomspace(2e-8,2e-2,num=9)) # format(2e-05, 'f')
        for first_layer_size in [64]:
            for added_dense_layers in [0]:
                for recurrent_dropout in [0.7]:
                    for dropout in [0.5]:

                        model = create_RNN_model(
                            first_layer_size = first_layer_size,
                            added_dense_layers = added_dense_layers,
                            recurrent_dropout = recurrent_dropout,
                            dropout = dropout
                        )

                        config={
                            "architecture": "RNN",
                            "moving_window_size": settings['MOVING_WINDOW_SIZE'],
                            "hz": settings['HZ'],
                            "step_size": settings['STEP_SIZE'],
                            # "test_proportion": settings['TEST_PROPORTION'],
                            "aggregation": settings['AGGREGATION'],
                            "features": settings['FEATURES'],
                            'batch size': settings['BATCH_SIZE'],
                            "epochs": 30,
                            "layers": len(model.layers),
                            "first_layer_size": first_layer_size,
                            "added_dense_layers": added_dense_layers,
                            "recurrent_dropout": recurrent_dropout,
                            "dropout": dropout,
                            "train-test split": split,
                            "learning_rate": learning_rate
                        }

                        run = wandb.init(
                            project="CDL1",
                            entity="cdl1",
                            tags=[split, 'dropout 3'],
                            name=config['architecture'],
                            config=config
                        )

                        # compile model
                        model.compile(
                            loss = 'categorical_crossentropy',
                            optimizer = optimizers.RMSprop(learning_rate=learning_rate),
                            metrics = ['accuracy']
                        )

                        model.fit(
                            train_generator,
                            steps_per_epoch=train_steps,
                            epochs = config['epochs'],
                            validation_data = test_generator,
                            validation_steps=test_steps,
                            callbacks=[WandbCallback()]
                        )

                        run.finish()