In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Conv1D, MaxPooling1D, BatchNormalization, Activation, Dropout, Bidirectional, LSTM, Embedding
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, TensorBoard, CSVLogger

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

import joblib
import glob
import matplotlib.pyplot as plt
from datetime import datetime
import os

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler





In [2]:
def label2onehot(label: np.array) -> np.array:
    thresholds = [0.25, 0.75, 1.25, 1.75]

    categorical_labels = np.digitize(label, thresholds)

    one_hot_encoder = to_categorical(categorical_labels)
    
    return one_hot_encoder

In [5]:
def load_data() -> tuple:
    X = None
    y = None
    all_path = sorted(glob.glob('../data/input_data/*'))
    # count = 1
    for count, path in enumerate(all_path):
        print(f"{count}. {path.split('/')[-1].split('.')[0]}")
        data_loaded = joblib.load(path)
        if X is None:
            X = data_loaded['X']
            y = data_loaded['activate_score']
        else:
            X = np.concatenate((X, data_loaded['X']), axis=0)
            y = np.concatenate((y, data_loaded['activate_score']), axis=0)

    y = y.reshape(-1, 1)
    y = label2onehot(y)
    # scaler = StandardScaler()
    # scaled_data = scaler.fit_transform(X)
    # pca = PCA(n_components=.95)
    # X = pca.fit_transform(scaled_data)
    
    return X, y

In [6]:
# X_train, X_test, y_train, y_test = load_data()
X_train, y_train = load_data()

    
input_shape = X_train.shape[1:]

0. input_data\batch_118
1. input_data\batch_119
2. input_data\batch_12
3. input_data\batch_120
4. input_data\batch_121
5. input_data\batch_122
6. input_data\batch_123
7. input_data\batch_124
8. input_data\batch_125
9. input_data\batch_126


In [7]:
# print(f'X_train: {X_train.shape}; X_test: {X_test.shape}; y_train: {y_train.shape}; y_test: {y_test.shape}, input_shape{input_shape}')
print(f'X_train: {X_train.shape}; y_train: {y_train.shape}; input_shape{input_shape}')

X_train: (5000, 14868, 13); y_train: (5000, 5); input_shape(14868, 13)


In [8]:
y_train

array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.]], dtype=float32)

In [9]:
X_train[0].shape

(14868, 13)

In [10]:
X_train

array([[[0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 1., 0.],
        [1., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 1., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 1., 0.],
        [1., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 1., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 1., 0.],
        [1., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 1., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 1., 0.],
        [1., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 1.

In [11]:
input_shape

(14868, 13)

In [16]:
def build_model(input_shape):
    inputs = Input(shape=input_shape, dtype=float, name="data")
    # labels = tf.keras.layers.Input(name="label", shape=(None,))
    # layer = Flatten(name='flatten')(layer)
    # layer = Embedding(input_dim=3260, output_dim=100, input_length=input_shape)(inputs)
    # layer = LSTM(128, return_sequences=True)(inputs)
    # layer = LSTM(128)(layer)
    layer = Flatten()(inputs)
    layer = Dense(units=128, activation='sigmoid', kernel_initializer='VarianceScaling', bias_initializer='Zeros', name='dense1')(layer)
    layer = Activation(activation='relu', name=f'activation_{4}')(layer)
    layer = Dropout(0.3, name='dropout_4')(layer)
    layer = Dense(units=64, activation='sigmoid', kernel_initializer='VarianceScaling', bias_initializer='Zeros', name='dense2')(layer)
    layer = Activation(activation='relu', name=f'activation_{5}')(layer)
    layer = Dropout(0.3, name='dropout_5')(layer)
    layer = Dense(units=32, activation='sigmoid', kernel_initializer='VarianceScaling', bias_initializer='Zeros', name='dense3')(layer)
    layer = Activation(activation='relu', name=f'activation_{6}')(layer)
    outputs = Dense(units=5, activation='softmax', kernel_initializer='VarianceScaling', bias_initializer='Zeros')(layer)
    model = Model(inputs=inputs, outputs=outputs, name='cnn_model')
    
    return model

In [17]:
input_shape

(14868, 13)

In [18]:
np.max(X_train)

1.0

In [19]:
model = build_model(input_shape=input_shape)
model.summary()

Model: "cnn_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 data (InputLayer)           [(None, 14868, 13)]       0         
                                                                 
 flatten (Flatten)           (None, 193284)            0         
                                                                 
 dense1 (Dense)              (None, 128)               24740480  
                                                                 
 activation_4 (Activation)   (None, 128)               0         
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense2 (Dense)              (None, 64)                8256      
                                                                 
 activation_5 (Activation)   (None, 64)                0 

In [20]:
model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])




In [21]:
now = datetime.now().strftime('%d-%m-%Y_%H-%M')

checkpoint_callback = ModelCheckpoint(filepath=f"../model/ModelCheckPoint/{now}/" + "model.{epoch:03d}-{val_loss:.4f}-{val_accuracy:.4f}.h5",
                                      monitor='val_loss',
                                      save_best_only=True,
                                      save_weights_only=False,
                                      verbose=1)
tensorboard_callback = TensorBoard(log_dir=f"../model/TensorBoard/{now}/logs")

folder_logger_path = f"../model/CSVLogger/{now}"

if os.path.exists(folder_logger_path) and os.path.isdir(folder_logger_path):
    os.rmdir(folder_logger_path)
os.makedirs(folder_logger_path)

csv_logger_callback = CSVLogger(f"{folder_logger_path}/training.log")

In [22]:
model.fit(X_train, y_train, 
          epochs=100,
          validation_split=.2,  
          batch_size=32,
          callbacks=[checkpoint_callback,
                     tensorboard_callback,
                     csv_logger_callback]
          )
model.save(f"../model/FinalModel/{now}/model.h5")

In [27]:
y_pred = model.predict(X_test)



In [28]:
y_test

array([[0., 1., 0., 0., 0.]], dtype=float32)

In [29]:
y_pred

array([[0.20089799, 0.19670978, 0.1969538 , 0.20947286, 0.19596557]],
      dtype=float32)

In [19]:
model.evaluate(X_test, y_test)



[0.9785565733909607, 0.0]