In [134]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Conv1D, MaxPooling1D, BatchNormalization, Activation, Dropout, Bidirectional, LSTM, Embedding
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, TensorBoard, CSVLogger

from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

import joblib
import glob
import matplotlib.pyplot as plt
from datetime import datetime
import os

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


In [40]:
def label2onehot(label: np.array) -> np.array:
    thresholds = [0.25, 0.75, 1.25, 1.75]

    categorical_labels = np.digitize(label, thresholds)

    one_hot_encoder = to_categorical(categorical_labels)
    
    return one_hot_encoder

In [135]:
def load_data() -> tuple:
    X = None
    y = None
    all_path = sorted(glob.glob('../data/input_data_seq/*'))
    # count = 1
    for count, path in enumerate(all_path):
        print(f"{count}. {path.split('/')[-1].split('.')[0]}")
        data_loaded = joblib.load(path)
        if X is None:
            X = data_loaded['X']
            y = data_loaded['activate_score']
        else:
            X = np.concatenate((X, data_loaded['X']), axis=0)
            y = np.concatenate((y, data_loaded['activate_score']), axis=0)

    y = y.reshape(-1, 1)
    y = label2onehot(y)
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(X)
    pca = PCA(n_components=.95)
    X = pca.fit_transform(scaled_data)
    
    return X, y

In [136]:
# X_train, X_test, y_train, y_test = load_data()
X_train, y_train = load_data()

    
input_shape = X_train.shape[1:]

0. input_data_seq\batch_0
1. input_data_seq\batch_1
2. input_data_seq\batch_2
3. input_data_seq\batch_3
4. input_data_seq\batch_4
5. input_data_seq\batch_5
6. input_data_seq\batch_6
7. input_data_seq\batch_7
8. input_data_seq\batch_8
9. input_data_seq\batch_9


In [137]:
# print(f'X_train: {X_train.shape}; X_test: {X_test.shape}; y_train: {y_train.shape}; y_test: {y_test.shape}, input_shape{input_shape}')
print(f'X_train: {X_train.shape}; y_train: {y_train.shape}; input_shape{input_shape}')

X_train: (5000, 74); y_train: (5000, 5); input_shape(74,)


In [138]:
y_train

array([[1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1.]], dtype=float32)

In [139]:
X_train[0].shape

(74,)

In [140]:
X_train

array([[-5.84944124e-02,  3.78311605e-01,  8.65931549e-02, ...,
         5.95612506e-01, -3.56797533e-02,  8.69713809e-01],
       [-1.48961700e-01, -8.01058980e-02,  4.43691967e-02, ...,
        -4.66690708e-01,  6.39793330e-02,  1.15674008e-01],
       [-4.38704404e-02,  6.56639530e-02,  5.48590772e-02, ...,
         3.65325198e-01, -1.75973369e-02,  1.83094188e+00],
       ...,
       [-2.43503221e-01, -7.72830033e-01, -2.93344928e-02, ...,
         4.98854589e-01, -1.87956955e-01,  8.87872728e-02],
       [ 1.99076984e+00,  3.16845717e+01, -2.07302299e+01, ...,
         1.42381834e-01, -6.67129797e-01,  1.64342199e+00],
       [ 1.34528346e+00,  1.75104296e+01, -1.13174795e+01, ...,
        -2.69923154e-01,  5.09727830e+00, -3.03921804e+00]])

In [141]:
input_shape

(74,)

In [143]:
def build_model(input_shape):
    inputs = Input(shape=input_shape, dtype=float, name="data")
    # labels = tf.keras.layers.Input(name="label", shape=(None,))
    # layer = Flatten(name='flatten')(layer)
    # layer = Embedding(input_dim=3260, output_dim=100, input_length=input_shape)(inputs)
    # layer = LSTM(128, return_sequences=True)(inputs)
    # layer = LSTM(128)(layer)
    
    layer = Dense(units=128, activation='sigmoid', kernel_initializer='VarianceScaling', bias_initializer='Zeros', name='dense1')(inputs)
    layer = Activation(activation='relu', name=f'activation_{4}')(layer)
    layer = Dropout(0.3, name='dropout_4')(layer)
    layer = Dense(units=64, activation='sigmoid', kernel_initializer='VarianceScaling', bias_initializer='Zeros', name='dense2')(layer)
    layer = Activation(activation='relu', name=f'activation_{5}')(layer)
    layer = Dropout(0.3, name='dropout_5')(layer)
    layer = Dense(units=32, activation='sigmoid', kernel_initializer='VarianceScaling', bias_initializer='Zeros', name='dense3')(layer)
    layer = Activation(activation='relu', name=f'activation_{6}')(layer)
    outputs = Dense(units=5, activation='softmax', kernel_initializer='VarianceScaling', bias_initializer='Zeros')(layer)
    model = Model(inputs=inputs, outputs=outputs, name='cnn_model')
    
    return model

In [144]:
input_shape

(74,)

In [145]:
np.max(X_train)

136.63552985667448

In [146]:
model = build_model(input_shape=input_shape)
model.summary()

Model: "cnn_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 data (InputLayer)           [(None, 74)]              0         
                                                                 
 dense1 (Dense)              (None, 128)               9600      
                                                                 
 activation_4 (Activation)   (None, 128)               0         
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense2 (Dense)              (None, 64)                8256      
                                                                 
 activation_5 (Activation)   (None, 64)                0         
                                                                 
 dropout_5 (Dropout)         (None, 64)                0 

In [147]:
model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

In [148]:
now = datetime.now().strftime('%d-%m-%Y_%H-%M')

checkpoint_callback = ModelCheckpoint(filepath=f"../model/ModelCheckPoint/{now}/" + "model.{epoch:03d}-{val_loss:.4f}-{val_accuracy:.4f}.h5",
                                      monitor='val_loss',
                                      save_best_only=True,
                                      save_weights_only=False,
                                      verbose=1)
tensorboard_callback = TensorBoard(log_dir=f"../model/TensorBoard/{now}/logs")

folder_logger_path = f"../model/CSVLogger/{now}"

if os.path.exists(folder_logger_path) and os.path.isdir(folder_logger_path):
    os.rmdir(folder_logger_path)
os.makedirs(folder_logger_path)

csv_logger_callback = CSVLogger(f"{folder_logger_path}/training.log")

In [150]:
model.fit(X_train, y_train, 
          epochs=100,
          validation_split=.2,  
          batch_size=32,
          callbacks=[checkpoint_callback,
                     tensorboard_callback,
                     csv_logger_callback]
          )
model.save(f"../model/FinalModel/{now}/model.h5")

Epoch 1/100
Epoch 1: val_loss improved from 1.38942 to 1.38786, saving model to ../model/ModelCheckPoint/18-03-2024_16-56\model.001-1.3879-0.3640.h5
Epoch 2/100
Epoch 2: val_loss improved from 1.38786 to 1.38467, saving model to ../model/ModelCheckPoint/18-03-2024_16-56\model.002-1.3847-0.3660.h5
Epoch 3/100
Epoch 3: val_loss did not improve from 1.38467
Epoch 4/100
Epoch 4: val_loss did not improve from 1.38467
Epoch 5/100
Epoch 5: val_loss improved from 1.38467 to 1.37883, saving model to ../model/ModelCheckPoint/18-03-2024_16-56\model.005-1.3788-0.3650.h5
Epoch 6/100
Epoch 6: val_loss did not improve from 1.37883
Epoch 7/100
Epoch 7: val_loss did not improve from 1.37883
Epoch 8/100
Epoch 8: val_loss improved from 1.37883 to 1.37686, saving model to ../model/ModelCheckPoint/18-03-2024_16-56\model.008-1.3769-0.3690.h5
Epoch 9/100
Epoch 9: val_loss improved from 1.37686 to 1.37539, saving model to ../model/ModelCheckPoint/18-03-2024_16-56\model.009-1.3754-0.3710.h5
Epoch 10/100
Epoch 

In [27]:
y_pred = model.predict(X_test)



In [28]:
y_test

array([[0., 1., 0., 0., 0.]], dtype=float32)

In [29]:
y_pred

array([[0.20089799, 0.19670978, 0.1969538 , 0.20947286, 0.19596557]],
      dtype=float32)

In [19]:
model.evaluate(X_test, y_test)



[0.9785565733909607, 0.0]