In [None]:
import os
import csv
import yaml
import wandb
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.layers import Dense, GRU, Dropout, Conv2D, MaxPooling2D, Flatten
from keras.models import Sequential
# from src import read_nz_file, read_jg_file, update_meta_data, split_df, aggregate_files, add_moving_window
from sklearn.model_selection import train_test_split
from wandb.keras import WandbCallback

In [None]:
# !pip install wandb

In [None]:
def read_preprocessing(folder, file_type: str = 'parquet'):
    if file_type == 'parquet':
        X_train = pd.read_parquet(f'{folder}/X_train.parquet')
        X_test = pd.read_parquet(f'{folder}/X_test.parquet')
        y_train = pd.read_parquet(f'{folder}/y_train.parquet')['y']
        y_test = pd.read_parquet(f'{folder}/y_test.parquet')['y']
    elif file_type == 'pickle':
        with open(f'{folder}/X_train.pickle', 'rb') as f: X_train = pickle.load(f)
        with open(f'{folder}/X_test.pickle', 'rb') as f: X_test = pickle.load(f)
        with open(f'{folder}/y_train.pickle', 'rb') as f: y_train = pickle.load(f)
        with open(f'{folder}/y_test.pickle', 'rb') as f: y_test = pickle.load(f)

    with open(f'{folder}//metadata.yaml') as file:
        settings = yaml.full_load(file)

    return X_train, X_test, y_train, y_test, settings

In [None]:
def read_sequential_preprocessing(folder):

    with open(f'{folder}/X_train.pickle', 'rb') as f: X_train = pickle.load(f)
    with open(f'{folder}/X_test.pickle', 'rb') as f: X_test = pickle.load(f)
    with open(f'{folder}/y_train.pickle', 'rb') as f: y_train = pickle.load(f)
    with open(f'{folder}/y_test.pickle', 'rb') as f: y_test = pickle.load(f)
    with open(f'{folder}/train_indexes.pickle', 'rb') as f: train_indexes = pickle.load(f)
    with open(f'{folder}/test_indexes.pickle', 'rb') as f: test_indexes = pickle.load(f)

    with open(f'{folder}//metadata.yaml') as file: settings = yaml.full_load(file)

    return X_train, X_test, y_train, y_test, train_indexes, test_indexes, settings

## Keras model

In [None]:
# X_train, X_test, y_train, y_test, settings = read_preprocessing(file_type='pickle', folder = './data/sensor/NN_5hz_5sec')

# my_train_indexes = [i for i in range(0, y_train.shape[0] - 1)]
# random.shuffle(my_train_indexes)

# my_test_indexes = [i for i in range(0, y_test.shape[0] - 1)]
# random.shuffle(my_test_indexes)

# X_train = X_train.take(my_train_indexes, 0)
# y_train = y_train.take(my_train_indexes, 0)
# X_test = X_test.take(my_test_indexes, 0)
# y_test = y_test.take(my_test_indexes, 0)

In [None]:
def generator(x, y, rows, moving_window_seconds, hz, step, batch_size, shuffle = False):

    lookback = moving_window_seconds * hz

    samples = np.zeros((batch_size, lookback, x.shape[-1]))
    targets = np.zeros((batch_size, y.shape[1]))

    i = 0
    while True:
        if shuffle:
            my_indexes = np.random.randint(0, len(rows) - 1, size=batch_size)
            my_rows = list(np.array(rows)[my_indexes])
        else:
            if i + batch_size >= len(rows) - 1:
                i = 0

            my_rows = rows[i:i + batch_size]
            # print(my_rows)

            i += batch_size

        for j, row in enumerate(my_rows):
            indices = range(row - lookback + 1, row + 1)
            samples[j] = x.iloc[indices]
            targets[j] = y[row]

        yield samples, targets

In [None]:
folders = [
    './data/sensor/sequential_1hz_5sec',
    './data/sensor/sequential_2hz_5sec',
    './data/sensor/sequential_5hz_5sec',
    './data/sensor/sequential_10hz_5sec',
    './data/sensor/sequential_20hz_5sec',
    './data/sensor/sequential_1hz_10sec',
    './data/sensor/sequential_2hz_10sec',
    './data/sensor/sequential_5hz_10sec',
    './data/sensor/sequential_10hz_10sec',
    './data/sensor/sequential_20hz_10sec',
    './data/sensor/sequential_1hz_20sec',
    './data/sensor/sequential_2hz_20sec',
    './data/sensor/sequential_5hz_20sec',
    './data/sensor/sequential_10hz_20sec',
    './data/sensor/sequential_20hz_20sec',
]

for my_folder in folders:
    X_train, X_test, y_train, y_test, train_indexes, test_indexes, settings = read_sequential_preprocessing(
        folder = my_folder
    )

    print(X_train.shape)

# print(y_train.shape)
# print(len(train_indexes))
# print(X_test.shape)
# print(y_test.shape)
# print(len(test_indexes))

In [None]:
train_generator = generator(
    x = X_train,
    y = y_train,
    rows = train_indexes,
    moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
    hz = settings['HZ'],
    step = settings['STEP_SIZE'],
    batch_size = settings['BATCH_SIZE'],
    shuffle=True
)

t, ts = next(train_generator)

In [None]:
print(t.shape)
print(ts.shape)

In [None]:
settings

In [None]:
def create_RNN_model(first_layer_size = 32, added_dense_layers = 0, recurrent_dropout = True):
    model = Sequential()
    if recurrent_dropout:
        model.add(GRU(first_layer_size, dropout=0.2, recurrent_dropout=0.2, input_shape=(None, X_train.shape[-1])))
    else:
        model.add(GRU(first_layer_size, input_shape=(None, X_train.shape[-1])))
    if added_dense_layers == 1:
        model.add(Dense(128, input_shape = (None, X_train.shape[-1]), activation = 'relu'))
        model.add(Dense(4, activation = 'softmax'))
    elif added_dense_layers == 2:
        model.add(Dense(512, input_shape = (None, X_train.shape[-1]), activation = 'relu'))
        model.add(Dense(64, activation = 'relu'))
        model.add(Dense(4, activation = 'softmax'))
    elif added_dense_layers == 0:
        model.add(Dense(4, input_shape = (None, X_train.shape[-1]), activation = 'softmax'))

    return model

In [None]:
wandb.login()

In [None]:
folders = [
    # './data/sensor/sequential_1hz_1sec',
    # './data/sensor/sequential_1hz_2sec',
    # './data/sensor/sequential_2hz_1sec',
    # './data/sensor/sequential_2hz_2sec',
    # './data/sensor/sequential_5hz_1sec',
    './data/sensor/sequential_5hz_2sec',
    # './data/sensor/sequential_10hz_1sec',
    # './data/sensor/sequential_10hz_2sec',
    # './data/sensor/sequential_20hz_1sec',
    # './data/sensor/sequential_20hz_2sec',
    # './data/sensor/sequential_1hz_5sec',
    # './data/sensor/sequential_2hz_5sec',
    # './data/sensor/sequential_5hz_5sec',
    # './data/sensor/sequential_10hz_5sec',
    # './data/sensor/sequential_20hz_5sec',
    # './data/sensor/sequential_1hz_10sec',
    # './data/sensor/sequential_2hz_10sec',
    # './data/sensor/sequential_5hz_10sec',
    # './data/sensor/sequential_10hz_10sec',
    # './data/sensor/sequential_20hz_10sec',
    # './data/sensor/sequential_1hz_20sec',
    # './data/sensor/sequential_2hz_20sec',
    # './data/sensor/sequential_5hz_20sec',
    # './data/sensor/sequential_10hz_20sec',
    # './data/sensor/sequential_20hz_20sec',
]

for my_folder in folders:
    X_train, X_test, y_train, y_test, train_indexes, test_indexes, settings = read_sequential_preprocessing(
        folder = my_folder
    )

    print(X_train.shape)
    print(y_train.shape)
    print(len(train_indexes))
    print(X_test.shape)
    print(y_test.shape)
    print(len(test_indexes))

    settings['BATCH_SIZE'] = 128

    train_generator = generator(
        x = X_train,
        y = y_train,
        rows = train_indexes,
        moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
        hz = settings['HZ'],
        step = settings['STEP_SIZE'],
        batch_size = settings['BATCH_SIZE'],
        shuffle=True
    )

    test_generator = generator(
        x = X_test,
        y = y_test,
        rows = test_indexes,
        moving_window_seconds = settings['MOVING_WINDOW_SIZE'],
        hz = settings['HZ'],
        step = settings['STEP_SIZE'],
        batch_size = settings['BATCH_SIZE'],
        shuffle=False
    )

    train_steps = len(X_train) // settings['BATCH_SIZE'] 
    print(f'training steps: {train_steps}')
    test_steps = len(X_test) // settings['BATCH_SIZE']
    print(f'test steps: {test_steps}')

    for first_layer_size in [128, 256]:
        for added_dense_layers in [0, 1]:
            for recurrent_dropout in [False, True]:

                model = create_RNN_model(
                    first_layer_size = first_layer_size,
                    added_dense_layers = added_dense_layers,
                    recurrent_dropout = recurrent_dropout
                )

                config={
                    "architecture": "RNN",
                    "moving_window_size": settings['MOVING_WINDOW_SIZE'],
                    "hz": settings['HZ'],
                    "step_size": settings['STEP_SIZE'],
                    "test_proportion": settings['TEST_PROPORTION'],
                    "aggregation": settings['AGGREGATION'],
                    "features": settings['FEATURES'],
                    'batch size': settings['BATCH_SIZE'],
                    "epochs": 20,
                    "layers": len(model.layers),
                    "first_layer_size": first_layer_size,
                    "added_dense_layers": added_dense_layers,
                    "recurrent_dropout": recurrent_dropout
                }

                run = wandb.init(
                    project="CDL1",
                    entity="cdl1",
                    name=config['architecture'],
                    config=config
                )

                # compile model
                model.compile(
                    loss = 'categorical_crossentropy',
                    optimizer = 'rmsprop',
                    metrics = ['accuracy']
                )

                model.fit(
                    train_generator,
                    steps_per_epoch=train_steps,
                    epochs = config['epochs'],
                    validation_data = test_generator,
                    validation_steps=test_steps,
                    callbacks=[WandbCallback()]
                )

                run.finish()

In [None]:
run.finish()

In [None]:
predictions = model.predict(X_test)

In [None]:
pd.Series(predictions.T[0]).apply(lambda x: 1 if x >= 0.5 else 0)