In [1]:
# imports
import sys
sys.path.append('../')

from glob import glob
from src.load_data import master_load
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import time

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Activation, BatchNormalization, Conv1D, Dense,
                                     Dropout, Flatten, LSTM, MaxPooling1D, Reshape,
                                     TimeDistributed)
from tensorflow.math import confusion_matrix

In [2]:
# set plotting params
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 32
plt.rcParams['axes.labelsize'] = 32
plt.rcParams['axes.titlesize'] = 32
plt.rcParams['xtick.labelsize'] = 24
plt.rcParams['ytick.labelsize'] = 24
plt.rcParams['legend.fontsize'] = 32
plt.rcParams['figure.titlesize'] = 48

In [3]:
X_train, X_test, y_train, y_test = master_load()

In [4]:
X_train.shape[1:]

(16, 60)

In [5]:
model = Sequential()

model.add(LSTM(64, return_sequences=True, input_shape=X_train.shape[1:]))

model.add(LSTM(64, return_sequences=True))

model.add(TimeDistributed(
    Dense(32, activation='relu')))

model.add(TimeDistributed(
    Dense(8, activation='relu')))
model.add(Dropout(0.5))

model.add(Flatten())

model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [6]:
epochs = 5
batch_size = 32

fitted = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))
score = fitted.model.evaluate(X_test, y_test, batch_size=batch_size)

model_name = f'../models/numpy/acc_{round(score[1], 3)}-loss_{round(score[0], 2)}-{int(time.time())}.model'

df = pd.DataFrame.from_dict(fitted.history)
df['model_name'] = model_name
fitted.model.save(model_name)

print('done')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




INFO:tensorflow:Assets written to: ../models/numpy/acc_0.369-loss_4.33-1628207162.model/assets


INFO:tensorflow:Assets written to: ../models/numpy/acc_0.369-loss_4.33-1628207162.model/assets


done


In [7]:
# modified
model = Sequential()

model.add(Conv1D(64, (3), input_shape=X_train.shape[1:]))
# model.add(Conv2D(16, (3, 3), input_shape=X_train.shape[1:]))  # Test the Conv2D
model.add(Activation('selu'))

model.add(Conv1D(16, (2)))
model.add(Activation('selu'))

model.add(MaxPooling1D(pool_size=(2)))
model.add(Flatten())

model.add(Dense(64))
model.add(Activation('selu'))

model.add(Dense(16))
model.add(Activation('selu'))

model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [8]:
epochs = 5
batch_size = 32

fitted = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))
score = fitted.model.evaluate(X_test, y_test, batch_size=batch_size)

model_name = f'../models/numpy/acc_{round(score[1], 3)}-loss_{round(score[0], 2)}-{int(time.time())}.model'

temp_df = pd.DataFrame.from_dict(fitted.history)
temp_df['model_name'] = model_name
df = pd.concat([df, temp_df])
fitted.model.save(model_name)

print('done')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: ../models/numpy/acc_0.362-loss_4.72-1628207633.model/assets


INFO:tensorflow:Assets written to: ../models/numpy/acc_0.362-loss_4.72-1628207633.model/assets


done


In [9]:
# modified
model = Sequential()

model.add(Conv1D(64, (3), input_shape=X_train.shape[1:]))
model.add(Activation('relu'))

model.add(Conv1D(64, (2)))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=(2)))

model.add(Conv1D(64, (2)))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=(2)))

model.add(Flatten())

model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dense(16))
model.add(Activation('relu'))

model.add(Dropout(0.5))

model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [10]:
epochs = 5
batch_size = 32

fitted = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))
score = fitted.model.evaluate(X_test, y_test, batch_size=batch_size)

model_name = f'../models/numpy/acc_{round(score[1], 3)}-loss_{round(score[0], 2)}-{int(time.time())}.model'

temp_df = pd.DataFrame.from_dict(fitted.history)
temp_df['model_name'] = model_name
df = pd.concat([df, temp_df])
fitted.model.save(model_name)

print('done')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: ../models/numpy/acc_0.384-loss_2.88-1628208126.model/assets


INFO:tensorflow:Assets written to: ../models/numpy/acc_0.384-loss_2.88-1628208126.model/assets


done


In [11]:
# stolen 1
model = Sequential()

model.add(Conv1D(128, (3), input_shape=X_train.shape[1:]))
model.add(Activation('relu'))

model.add(Conv1D(63, (2)))
model.add(Activation('relu'))

model.add(MaxPooling1D(pool_size=(2)))

model.add(Conv1D(64, (2)))
model.add(Activation('relu'))

model.add(MaxPooling1D(pool_size=(2)))

model.add(Flatten())

model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dense(16))
model.add(Activation('relu'))

model.add(Dropout(0.5))

model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
epochs = 5
batch_size = 32

fitted = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))
score = fitted.model.evaluate(X_test, y_test, batch_size=batch_size)

model_name = f'../models/numpy/acc_{round(score[1], 3)}-loss_{round(score[0], 2)}-{int(time.time())}.model'

temp_df = pd.DataFrame.from_dict(fitted.history)
temp_df['model_name'] = model_name
df = pd.concat([df, temp_df])
fitted.model.save(model_name)

print('done')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
 564/8825 [>.............................] - ETA: 1:22 - loss: 0.5660 - accuracy: 0.6943

In [None]:
reshape = (-1, 16, 60)

train_X = np.clip(np.array(X_train).reshape(reshape), -10, 10) / 10
test_X = np.clip(np.array(X_test).reshape(reshape), -10, 10) / 10

In [None]:
# stolen 2
model = Sequential()

model.add(Conv1D(128, (5), padding='same', input_shape=train_X.shape[1:]))
model.add(Activation('relu'))

model.add(Conv1D(64, (5), padding='same'))
model.add(Activation('relu'))

model.add(Conv1D(16, (5), padding='same'))
model.add(Activation('relu'))

model.add(Conv1D(8, (5), padding='same'))
model.add(Activation('relu'))

model.add(Conv1D(3, (16)))
model.add(Reshape((3,)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
epochs = 5
batch_size = 32

fitted = model.fit(train_X, y_train, batch_size=batch_size, epochs=epochs, validation_data=(test_X, y_test))
score = fitted.model.evaluate(X_test, y_test, batch_size=batch_size)

model_name = f'../models/numpy/acc_{round(score[1], 3)}-loss_{round(score[0], 2)}-{int(time.time())}.model'

temp_df = pd.DataFrame.from_dict(fitted.history)
temp_df['model_name'] = model_name
df = pd.concat([df, temp_df])
fitted.model.save(model_name)

print('done')

In [None]:
best_model = sorted(glob('../models/numpy/*.model'))[-1]

In [None]:
model = tf.keras.models.load_model(best_model)

y_pred = np.argmax(model.predict(X_test), axis=-1)
y_true = np.argmax(y_test, axis=-1)
conf_mat = np.round(np.array(confusion_matrix(y_true, y_pred)) / 11750, 3)

In [None]:
def plot_conf_mat(conf_mat):
    actions = ['left', 'none', 'right']

    fig, ax = plt.subplots(figsize=(16, 9))

    ax.matshow(conf_mat, cmap=plt.cm.RdYlGn)
    ax.set_xticklabels([""]+actions)
    ax.set_yticklabels([""]+actions)

    for i, val0 in enumerate(conf_mat.T):
        for ii, val1 in enumerate(val0):
            ax.text(i, ii, f"{val1}", va='center', ha='center')

    plt.title("Action Thought")
    plt.ylabel("Predicted Action")
    plt.show()

In [None]:
plot_conf_mat(conf_mat)

In [None]:
# imports
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestRegressor
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, train_test_split
import xgboost as xgb

In [None]:
class Models:
    '''
    Build an object to process varied models and stow results
    '''

    def __init__(self, X_train, X_test, y_train, y_test):
        '''
        initialize Models object

        params
        ======
        X_train (pandas.coaccuracy_scoreme.DataFrame): dataframe of features to train on
        X_test (pandas.core.frame.DataFrame): dataframe of features to test on
        y_train (pandas.core.series.Series): series of target to train on
        y_test (pandas.core.series.Series): series of target to test on

        attrs
        =====
        results_df (pandas.core.frame.DataFrame): empty dataframe to stow 
            testing results
        best_models (list): stows the model with the best hyperparams

        returns
        =======
        None
        '''
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test

        self.results_df = pd.DataFrame(
            columns=['model', 'params', 'accuracy'])
        self.best_models = []

    def build_grid_search(self, model, params):
        '''
        build GridSearchCV object

        params
        ======
        model (class): the type of sklearn model to be employed
        params (dict): contains the hyperparameter names as keys and
            hyperparameters as values

        attrs
        =====
        None

        returns
        =======
        gs (sklearn.model_selection._search.GridSearchCV): object ready to
            iteratively identify the ideal model
        '''
        return GridSearchCV(model, params, n_jobs=-1,
                            scoring='accuracy', verbose=True)

    def fit_model(self, model, params):
        '''
        fits the model from training data

        params
        ======
        model (class): the type of sklearn model to be employed
        params (dict): contains the hyperparameter names as keys and
            hyperparameters as values

        attrs
        =====
        None

        returns
        =======
        best_gs (sklearn.model_selection._search.GridSearchCV): the model that
            had the highest R2 score
        '''
        gs = self.build_grid_search(model, params)
        print(gs.estimator)
        gs = gs.fit(self.X_train, self.y_train)
        best_gs = gs.best_estimator_

        self.best_models.append(best_gs)

        return best_gs

    def eval_model(self, model, params):
        '''
        evalates the model on the testing data

        params
        ======
        model (class): the type of sklearn model to be employed
        params (dict): contains the hyperparameter names as keys and
            hyperparameters as values

        attrs
        =====
        None

        returns
        =======
        None
        '''
        best_gs = self.fit_model(model, params)

        y_pred = best_gs.predict(self.X_test)
        accuracy = accuracy_score(y_pred, self.y_test)

        model_name = str(model.__class__)
        model_name = model_name.split('.')[-1].split("'")[0]

        best_params = best_gs.get_params()

        # create and add result to results_df
        row = [model_name, best_params, accuracy]
        self.results_df.loc[len(self.results_df)] = row

        print('\n')


In [None]:
# create class to stow models
flat_X_train = X_train[:1000, :].mean(axis=2)
flat_X_test = X_test[:1000, :].mean(axis=2)
flat_y_train = y_train.argmax(axis=1)[:1000]
flat_y_test = y_test.argmax(axis=1)[:1000]

models = Models(flat_X_train, flat_X_test, flat_y_train, flat_y_test)

In [None]:
print(flat_X_train.shape)
print(flat_X_test.shape)
print(flat_y_train.shape)
print(flat_y_test.shape)

In [None]:
# establish params for each model
# elastic_net_params = {'alpha': [0.1, 0.5, 0.9],
#                       'l1_ratio': [0.2, 0.5, 0.8],
#                       'fit_intercept': [False, True],
#                       'normalize': [False, True]}

knn_params = {'n_neighbors': [3],
              'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
              'n_jobs': [-2]}

# pls_params = {'n_components': [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
#              'tol': [0.000000001, 0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001],
#              'scale': [False, True]}

# rfr_params = {'n_estimators': [10, 100, 1000, 10000],
#               'max_depth': [None, 3, 7, 9, 15],
#               'max_features': ['auto', 'sqrt', 'log2'],
#               'bootstrap': [False, True]}

# xgb_params = {'max_depth': [3, 7, 9, 15],
#               'subsample': [0.01, 0.1, 0.5, 0.9, 0.99],
#               'colsample_bytree': [0.01, 0.1, 0.5, 0.9, 0.99],
#               'n_estimators': [10, 100, 1000, 10000]}

# gbc_params = {'criterion': ['mse'],
#               'loss': ['deviance'],
#               'max_features': ['auto', 'sqrt'],
#               'subsample': [0.5, 0.7, 0.9],
#               'max_depth': [4, 6, 8],
#               'n_estimators': [1000]}

In [None]:
# build, fit, & eval models
# models.eval_model(model=ElasticNet(), params=elastic_net_params)
models.eval_model(model=KNeighborsClassifier(), params=knn_params)
# models.eval_model(model=PLSRegression(), params=pls_params)
# models.eval_model(model=RandomForestRegressor(), params=rfr_params)
# models.eval_model(model=xgb.XGBRegressor(objective='reg:squarederror'), params=xgb_params)
# models.eval_model(model=(), params=gbc_params)

In [None]:
# show results
models.results_df