## WaveNet Model

### Import Dependencies and Settings

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import *
import pandas as pd
import numpy as np
import random
from tensorflow.keras.callbacks import Callback, LearningRateScheduler,ModelCheckpoint
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model
from tensorflow.keras import losses, models, optimizers

import tensorflow_addons as tfa
import gc
import os

# CUDA setting
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

from sklearn.model_selection import GroupKFold
from sklearn.metrics import f1_score

import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 500)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

### Hyperparameter

In [2]:
# configurations and main hyperparammeters
EPOCHS = 120
NNBATCHSIZE = 16
GROUP_BATCH_SIZE = 4000
SEED = 321
LR = 0.0015
SPLITS = 5

# seed for code replicability
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)

### Main

In [3]:
def read_data():
    # read data without drift
    train = pd.read_csv('../input/data-without-drift/train_clean.csv', dtype={'time': np.float32, 'signal': np.float32, 'open_channels':np.int32})
    test  = pd.read_csv('../input/data-without-drift/test_clean.csv', dtype={'time': np.float32, 'signal': np.float32})
    sub  = pd.read_csv('../input/liverpool-ion-switching/sample_submission.csv', dtype={'time': np.float32})
    
    # read random forest probability
    Y_train_proba = np.load("../input/ion-shifted-rfc-proba/Y_train_proba.npy")
    Y_test_proba = np.load("../input/ion-shifted-rfc-proba/Y_test_proba.npy")
    
    # add to train and test
    for i in range(11):
        train[f"proba_{i}"] = Y_train_proba[:, i]
        test[f"proba_{i}"] = Y_test_proba[:, i]

    return train, test, sub

# batching
def batching(df, batch_size):
    df['group'] = df.groupby(df.index//batch_size, sort=False)['signal'].agg(['ngroup']).values
    df['group'] = df['group'].astype(np.uint16)
    return df

# normalizaiton
def normalize(train, test):
    train_input_mean = train.signal.mean()
    train_input_sigma = train.signal.std()
    train['signal'] = (train.signal - train_input_mean) / train_input_sigma
    test['signal'] = (test.signal - train_input_mean) / train_input_sigma
    return train, test

# get lead and lag data
def lag_with_pct_change(df, windows):
    for window in windows:    
        df['signal_shift_pos_' + str(window)] = df.groupby('group')['signal'].shift(window).fillna(0)
        df['signal_shift_neg_' + str(window)] = df.groupby('group')['signal'].shift(-1 * window).fillna(0)
    return df

# main module to run feature engineering
    # create batch
    df = batching(df, batch_size = batch_size)
    # get lead and lag data
    df = lag_with_pct_change(df, [1, 2, 3])
    # square the data
    df['signal_2'] = df['signal'] ** 2
    return df

# selecting feature other than index, group, open_channels, and time
def feature_selection(train, test):
    features = [col for col in train.columns if col not in ['index', 'group', 'open_channels', 'time']]
    train = train.replace([np.inf, -np.inf], np.nan)
    test = test.replace([np.inf, -np.inf], np.nan)
    print(len(features))
    
    # fill missing values with mean
    for feature in features:
        feature_mean = pd.concat([train[feature], test[feature]], axis = 0).mean()
        train[feature] = train[feature].fillna(feature_mean)
        test[feature] = test[feature].fillna(feature_mean)
    return train, test, features

## neural network
def Classifier(shape_):
    # Conv1D + BatchNormalization + Relu
    def cbr(x, out_layer, kernel, stride, dilation):
        x = Conv1D(out_layer, kernel_size=kernel, dilation_rate=dilation, strides=stride, padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        return x
    
    ## wavenet
    ## https://www.jianshu.com/p/bb13ae73e427
    def wave_block(x, filters, kernel_size, n):
        # dilation rates
        dilation_rates = [2**i for i in range(n)]
        x = Conv1D(filters = filters,
                   kernel_size = 1,
                   padding = 'same')(x)
        res_x = x
        for dilation_rate in dilation_rates:
            tanh_out = Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same', 
                              activation = 'tanh', 
                              dilation_rate = dilation_rate)(x)
            sigm_out = Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same',
                              activation = 'sigmoid', 
                              dilation_rate = dilation_rate)(x)
            x = Multiply()([tanh_out, sigm_out])
            x = Conv1D(filters = filters,
                       kernel_size = 1,
                       padding = 'same')(x)
            res_x = Add()([res_x, x])
        return res_x
    
    # main network
    inp = Input(shape = (shape_))
    # first cbr
    x1 = cbr(inp, 64,3, 1, 2)
    x1 = BatchNormalization()(x1)
    # second cbr 
    x2 = cbr(inp, 64,5, 1, 2)
    x2 = BatchNormalization()(x2)
    # concatenate two cbr
    x = Concatenate()([x1,x2])
    # first wavenet block
    x = wave_block(x, 24, 3, 12)
    x = BatchNormalization()(x)
    # second wavenet block
    x = wave_block(x, 32, 3, 8)
    x = BatchNormalization()(x)
    # third wavenet block
    x = wave_block(x, 64, 3, 4)
    x = BatchNormalization()(x)
    # forth wavenet block
    x = wave_block(x, 128, 3, 1)
    # add another cbr
    x = cbr(x, 32, 3, 1, 1)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    # output 
    out = Dense(11, activation = 'softmax', name = 'out')(x)
    
    model = models.Model(inputs = inp, outputs = out)
    # adam as optimizer 
    opt = Adam(lr = LR)
    # SWA as optimizer
    opt = tfa.optimizers.SWA(opt)
    # compile
    model.compile(loss = losses.CategoricalCrossentropy(), optimizer = opt, metrics = ['accuracy'])
#     model.compile(loss = categorical_focal_loss(11),optimizer = opt, metrics = ['accuracy'])
    return model

# learning rate schedule
def lr_schedule(epoch):
    if epoch < 30:
        lr = LR
    elif epoch < 40:
        lr = LR / 3
    elif epoch < 50:
        lr = LR / 5
    elif epoch < 60:
        lr = LR / 7
    elif epoch < 70:
        lr = LR / 9
    elif epoch < 80:
        lr = LR / 11
    elif epoch < 90:
        lr = LR / 13
    else:
        lr = LR / 100
    return lr

# callback function displaying MicroF1
class MacroF1(Callback):
    def __init__(self, model, inputs, targets):
        self.model = model
        self.inputs = inputs
        self.targets = np.argmax(targets, axis = 2).reshape(-1)
        
    def on_epoch_end(self, epoch, logs):
        pred = np.argmax(self.model.predict(self.inputs), axis = 2).reshape(-1)
        score = f1_score(self.targets, pred, average = 'macro')
        print(f'F1 Macro Score: {score:.5f}')

# main function to perfrom groupkfold cross validation
def run_cv_model_by_batch(train, test, splits, batch_col, feats, sample_submission, nn_epochs, nn_batch_size):
    # seed
    seed_everything(SEED)
    
    # use compact graphs to save gpu
    K.clear_session()
    config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=2,inter_op_parallelism_threads=2)
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    
    # settings
    oof_ = np.zeros((len(train), 11)) 
    preds_ = np.zeros((len(test), 11))
    target = ['open_channels']
    
    # kfold by group
    group = train['group']
    kf = GroupKFold(n_splits=5)
    splits = [x for x in kf.split(train, train[target], group)]
    
    ## save to new splits
    new_splits = []
    for sp in splits:
        new_split = []
        new_split.append(np.unique(group[sp[0]]))
        new_split.append(np.unique(group[sp[1]]))
        new_split.append(sp[1])    
        new_splits.append(new_split)
        
    # one-hot encode open channels 
    tr = pd.concat([pd.get_dummies(train.open_channels), train[['group']]], axis=1)

    tr.columns = ['target_'+str(i) for i in range(11)] + ['group']
    target_cols = ['target_'+str(i) for i in range(11)]
    train_tr = np.array(list(tr.groupby('group').apply(lambda x: x[target_cols].values))).astype(np.float32)
    train = np.array(list(train.groupby('group').apply(lambda x: x[feats].values)))
    test = np.array(list(test.groupby('group').apply(lambda x: x[feats].values)))

    for n_fold, (tr_idx, val_idx, val_orig_idx) in enumerate(new_splits[0:], start=0):
        train_x, train_y = train[tr_idx], train_tr[tr_idx]
        valid_x, valid_y = train[val_idx], train_tr[val_idx]
        
        # garbage collection
        gc.collect()
        
        # input data's shape
        shape_ = (None, train_x.shape[2]) 
        model = Classifier(shape_)
        
        # use checkpoint to save best model output
        checkpoint = ModelCheckpoint(filepath='wavenet.h5',monitor='val_accuracy',mode='auto' ,save_best_only='True')
        
        # use schedule defined before 
        cb_lr_schedule = LearningRateScheduler(lr_schedule)
        
        # fit
        model.fit(train_x,train_y,
                  epochs = nn_epochs,
                  callbacks = [cb_lr_schedule,MacroF1(model, valid_x, valid_y),checkpoint], # adding custom evaluation metric for each epoch
                  batch_size = nn_batch_size,verbose = 2,
                  validation_data = (valid_x,valid_y))
        
        # load best model 
        model.load_weights("wavenet.h5")
        preds_f = model.predict(valid_x)
        
        # pring f1 score 
        f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1),  np.argmax(preds_f, axis=2).reshape(-1), average = 'macro') # need to get the class with the biggest probability
        print(f'Training fold {n_fold + 1} completed. macro f1 score : {f1_score_ :1.5f}')
        preds_f = preds_f.reshape(-1, preds_f.shape[-1])
        oof_[val_orig_idx,:] += preds_f
        te_preds = model.predict(test)
        te_preds = te_preds.reshape(-1, te_preds.shape[-1])           
        preds_ += te_preds / SPLITS
        
    # axis 2 for the 3 Dimension array and axis 1 for the 2 Domension Array (extracting the best class)
    f1_score_ = f1_score(np.argmax(train_tr, axis = 2).reshape(-1),  np.argmax(oof_, axis = 1), average = 'macro') 
    
    print(f'Training completed. oof macro f1 score : {f1_score_:1.5f}')
    
    # save to sample submission
    sample_submission['open_channels'] = np.argmax(preds_, axis = 1).astype(int)
    sample_submission.to_csv('submission_wavenet1.csv', index=False, float_format='%.4f')
    
def run_everything():
    
    print('Reading Data Started...')
    train, test, sample_submission = read_data()
    train, test = normalize(train, test)
    print('Reading and Normalizing Data Completed')
        
    print('Creating Features')
    print('Feature Engineering Started...')
    train = run_feat_engineering(train, batch_size = GROUP_BATCH_SIZE)
    test = run_feat_engineering(test, batch_size = GROUP_BATCH_SIZE)
    train, test, features = feature_selection(train, test)
    print('Feature Engineering Completed...')
        
   
    print(f'Training Wavenet model with {SPLITS} folds of GroupKFold Started...')
    run_cv_model_by_batch(train, test, SPLITS, 'group', features, sample_submission, EPOCHS, NNBATCHSIZE)
    print('Training completed...')
        
run_everything()

Reading Data Started...
Reading and Normalizing Data Completed
Creating Features
Feature Engineering Started...
19
Feature Engineering Completed...
Training Wavenet model with 5 folds of GroupKFold Started...
Our training dataset shape is (1000, 4000, 19)
Our validation dataset shape is (250, 4000, 19)
Train on 1000 samples, validate on 250 samples
Epoch 1/120
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
F1 Macro Score: 0.77779
1000/1000 - 31s - loss: 0.5203 - accuracy: 0.8591 - val_loss: 0.7131 - val_accuracy: 0.8882
Epoch 2/120
F1 Macro Score: 0.90681
1000/1000 - 8s - loss: 0.1728 - accuracy: 0.9595 - val_loss: 0.3545 - val_accuracy: 0.9509
Epoch 3/120
F1 Macro Score: 0.92801
1000/1000 - 8s - loss: 0.1370 - accuracy: 0.9648 - val_loss: 0.1830 - val_accuracy: 0.9627
Epoch 4/120
F1 Macro Score: 0.92596
1000/1000 - 8s - loss: 0.1292 - accuracy: 0.9653 - val_loss: 0.1419 - val_accuracy: 0.9637
Epoch 5/120
F1 Macro Score: 0.93315
1000/1000 - 8s - loss: 

Epoch 60/120
F1 Macro Score: 0.94109
1000/1000 - 8s - loss: 0.0792 - accuracy: 0.9704 - val_loss: 0.0825 - val_accuracy: 0.9686
Epoch 61/120
F1 Macro Score: 0.94122
1000/1000 - 8s - loss: 0.0788 - accuracy: 0.9705 - val_loss: 0.0822 - val_accuracy: 0.9685
Epoch 62/120
F1 Macro Score: 0.94107
1000/1000 - 8s - loss: 0.0788 - accuracy: 0.9705 - val_loss: 0.0822 - val_accuracy: 0.9686
Epoch 63/120
F1 Macro Score: 0.93996
1000/1000 - 8s - loss: 0.0785 - accuracy: 0.9705 - val_loss: 0.0836 - val_accuracy: 0.9680
Epoch 64/120
F1 Macro Score: 0.94077
1000/1000 - 8s - loss: 0.0787 - accuracy: 0.9705 - val_loss: 0.0825 - val_accuracy: 0.9684
Epoch 65/120
F1 Macro Score: 0.94110
1000/1000 - 8s - loss: 0.0784 - accuracy: 0.9706 - val_loss: 0.0822 - val_accuracy: 0.9686
Epoch 66/120
F1 Macro Score: 0.94103
1000/1000 - 8s - loss: 0.0785 - accuracy: 0.9706 - val_loss: 0.0823 - val_accuracy: 0.9685
Epoch 67/120
F1 Macro Score: 0.94082
1000/1000 - 8s - loss: 0.0786 - accuracy: 0.9706 - val_loss: 0.0824

Epoch 3/120
F1 Macro Score: 0.92376
1000/1000 - 8s - loss: 0.1415 - accuracy: 0.9642 - val_loss: 0.1864 - val_accuracy: 0.9624
Epoch 4/120
F1 Macro Score: 0.93058
1000/1000 - 8s - loss: 0.1315 - accuracy: 0.9648 - val_loss: 0.1337 - val_accuracy: 0.9671
Epoch 5/120
F1 Macro Score: 0.93043
1000/1000 - 8s - loss: 0.1238 - accuracy: 0.9654 - val_loss: 0.1173 - val_accuracy: 0.9669
Epoch 6/120
F1 Macro Score: 0.93587
1000/1000 - 8s - loss: 0.1207 - accuracy: 0.9652 - val_loss: 0.1014 - val_accuracy: 0.9686
Epoch 7/120
F1 Macro Score: 0.93331
1000/1000 - 8s - loss: 0.1180 - accuracy: 0.9654 - val_loss: 0.1005 - val_accuracy: 0.9682
Epoch 8/120
F1 Macro Score: 0.93797
1000/1000 - 8s - loss: 0.1146 - accuracy: 0.9656 - val_loss: 0.0958 - val_accuracy: 0.9689
Epoch 9/120
F1 Macro Score: 0.93705
1000/1000 - 8s - loss: 0.1091 - accuracy: 0.9660 - val_loss: 0.0930 - val_accuracy: 0.9688
Epoch 10/120
F1 Macro Score: 0.93847
1000/1000 - 8s - loss: 0.1101 - accuracy: 0.9655 - val_loss: 0.0922 - val_

F1 Macro Score: 0.94231
1000/1000 - 8s - loss: 0.0810 - accuracy: 0.9696 - val_loss: 0.0758 - val_accuracy: 0.9712
Epoch 68/120
F1 Macro Score: 0.94193
1000/1000 - 8s - loss: 0.0808 - accuracy: 0.9696 - val_loss: 0.0762 - val_accuracy: 0.9711
Epoch 69/120
F1 Macro Score: 0.94234
1000/1000 - 8s - loss: 0.0810 - accuracy: 0.9696 - val_loss: 0.0756 - val_accuracy: 0.9712
Epoch 70/120
F1 Macro Score: 0.94223
1000/1000 - 8s - loss: 0.0806 - accuracy: 0.9697 - val_loss: 0.0758 - val_accuracy: 0.9712
Epoch 71/120
F1 Macro Score: 0.94186
1000/1000 - 8s - loss: 0.0808 - accuracy: 0.9696 - val_loss: 0.0763 - val_accuracy: 0.9710
Epoch 72/120
F1 Macro Score: 0.94202
1000/1000 - 8s - loss: 0.0803 - accuracy: 0.9698 - val_loss: 0.0760 - val_accuracy: 0.9711
Epoch 73/120
F1 Macro Score: 0.94232
1000/1000 - 8s - loss: 0.0805 - accuracy: 0.9697 - val_loss: 0.0758 - val_accuracy: 0.9712
Epoch 74/120
F1 Macro Score: 0.94230
1000/1000 - 8s - loss: 0.0806 - accuracy: 0.9696 - val_loss: 0.0761 - val_accura

Epoch 10/120
F1 Macro Score: 0.93557
1000/1000 - 8s - loss: 0.1225 - accuracy: 0.9643 - val_loss: 0.1060 - val_accuracy: 0.9675
Epoch 11/120
F1 Macro Score: 0.93558
1000/1000 - 8s - loss: 0.1121 - accuracy: 0.9661 - val_loss: 0.0978 - val_accuracy: 0.9677
Epoch 12/120
F1 Macro Score: 0.93655
1000/1000 - 8s - loss: 0.1052 - accuracy: 0.9663 - val_loss: 0.0965 - val_accuracy: 0.9681
Epoch 13/120
F1 Macro Score: 0.93564
1000/1000 - 8s - loss: 0.1044 - accuracy: 0.9663 - val_loss: 0.0933 - val_accuracy: 0.9679
Epoch 14/120
F1 Macro Score: 0.93677
1000/1000 - 8s - loss: 0.1013 - accuracy: 0.9665 - val_loss: 0.0914 - val_accuracy: 0.9681
Epoch 15/120
F1 Macro Score: 0.93600
1000/1000 - 8s - loss: 0.1011 - accuracy: 0.9664 - val_loss: 0.0919 - val_accuracy: 0.9679
Epoch 16/120
F1 Macro Score: 0.93617
1000/1000 - 8s - loss: 0.0995 - accuracy: 0.9665 - val_loss: 0.0911 - val_accuracy: 0.9679
Epoch 17/120
F1 Macro Score: 0.93679
1000/1000 - 8s - loss: 0.0982 - accuracy: 0.9666 - val_loss: 0.0882

F1 Macro Score: 0.94016
1000/1000 - 8s - loss: 0.0798 - accuracy: 0.9700 - val_loss: 0.0792 - val_accuracy: 0.9699
Epoch 75/120
F1 Macro Score: 0.94045
1000/1000 - 8s - loss: 0.0798 - accuracy: 0.9700 - val_loss: 0.0789 - val_accuracy: 0.9702
Epoch 76/120
F1 Macro Score: 0.94011
1000/1000 - 8s - loss: 0.0796 - accuracy: 0.9700 - val_loss: 0.0790 - val_accuracy: 0.9699
Epoch 77/120
F1 Macro Score: 0.94005
1000/1000 - 8s - loss: 0.0794 - accuracy: 0.9701 - val_loss: 0.0789 - val_accuracy: 0.9700
Epoch 78/120
F1 Macro Score: 0.93995
1000/1000 - 8s - loss: 0.0800 - accuracy: 0.9701 - val_loss: 0.0791 - val_accuracy: 0.9700
Epoch 79/120
F1 Macro Score: 0.94030
1000/1000 - 8s - loss: 0.0792 - accuracy: 0.9701 - val_loss: 0.0787 - val_accuracy: 0.9701
Epoch 80/120
F1 Macro Score: 0.94004
1000/1000 - 8s - loss: 0.0791 - accuracy: 0.9702 - val_loss: 0.0787 - val_accuracy: 0.9701
Epoch 81/120
F1 Macro Score: 0.94011
1000/1000 - 8s - loss: 0.0793 - accuracy: 0.9701 - val_loss: 0.0787 - val_accura

Epoch 17/120
F1 Macro Score: 0.93624
1000/1000 - 8s - loss: 0.1013 - accuracy: 0.9667 - val_loss: 0.0933 - val_accuracy: 0.9672
Epoch 18/120
F1 Macro Score: 0.93756
1000/1000 - 8s - loss: 0.0996 - accuracy: 0.9668 - val_loss: 0.0920 - val_accuracy: 0.9674
Epoch 19/120
F1 Macro Score: 0.93658
1000/1000 - 8s - loss: 0.0987 - accuracy: 0.9668 - val_loss: 0.0940 - val_accuracy: 0.9671
Epoch 20/120
F1 Macro Score: 0.93683
1000/1000 - 8s - loss: 0.0985 - accuracy: 0.9668 - val_loss: 0.0934 - val_accuracy: 0.9671
Epoch 21/120
F1 Macro Score: 0.93769
1000/1000 - 8s - loss: 0.0962 - accuracy: 0.9670 - val_loss: 0.0898 - val_accuracy: 0.9675
Epoch 22/120
F1 Macro Score: 0.93644
1000/1000 - 8s - loss: 0.0964 - accuracy: 0.9668 - val_loss: 0.0902 - val_accuracy: 0.9673
Epoch 23/120
F1 Macro Score: 0.93786
1000/1000 - 8s - loss: 0.0947 - accuracy: 0.9670 - val_loss: 0.0885 - val_accuracy: 0.9676
Epoch 24/120
F1 Macro Score: 0.93672
1000/1000 - 8s - loss: 0.0940 - accuracy: 0.9669 - val_loss: 0.0938

F1 Macro Score: 0.94069
1000/1000 - 8s - loss: 0.0803 - accuracy: 0.9700 - val_loss: 0.0802 - val_accuracy: 0.9694
Epoch 82/120
F1 Macro Score: 0.94036
1000/1000 - 8s - loss: 0.0802 - accuracy: 0.9701 - val_loss: 0.0805 - val_accuracy: 0.9693
Epoch 83/120
F1 Macro Score: 0.94075
1000/1000 - 8s - loss: 0.0799 - accuracy: 0.9701 - val_loss: 0.0800 - val_accuracy: 0.9695
Epoch 84/120
F1 Macro Score: 0.94041
1000/1000 - 8s - loss: 0.0797 - accuracy: 0.9701 - val_loss: 0.0803 - val_accuracy: 0.9693
Epoch 85/120
F1 Macro Score: 0.94058
1000/1000 - 8s - loss: 0.0798 - accuracy: 0.9701 - val_loss: 0.0801 - val_accuracy: 0.9693
Epoch 86/120
F1 Macro Score: 0.94077
1000/1000 - 8s - loss: 0.0798 - accuracy: 0.9700 - val_loss: 0.0801 - val_accuracy: 0.9694
Epoch 87/120
F1 Macro Score: 0.93985
1000/1000 - 8s - loss: 0.0798 - accuracy: 0.9701 - val_loss: 0.0804 - val_accuracy: 0.9692
Epoch 88/120
F1 Macro Score: 0.94064
1000/1000 - 8s - loss: 0.0798 - accuracy: 0.9701 - val_loss: 0.0801 - val_accura

Epoch 24/120
F1 Macro Score: 0.93627
1000/1000 - 8s - loss: 0.0911 - accuracy: 0.9680 - val_loss: 0.0913 - val_accuracy: 0.9671
Epoch 25/120
F1 Macro Score: 0.93777
1000/1000 - 8s - loss: 0.0933 - accuracy: 0.9677 - val_loss: 0.0855 - val_accuracy: 0.9686
Epoch 26/120
F1 Macro Score: 0.93864
1000/1000 - 8s - loss: 0.0915 - accuracy: 0.9679 - val_loss: 0.0882 - val_accuracy: 0.9682
Epoch 27/120
F1 Macro Score: 0.94024
1000/1000 - 8s - loss: 0.0887 - accuracy: 0.9685 - val_loss: 0.0820 - val_accuracy: 0.9692
Epoch 28/120
F1 Macro Score: 0.93956
1000/1000 - 8s - loss: 0.0877 - accuracy: 0.9686 - val_loss: 0.0832 - val_accuracy: 0.9689
Epoch 29/120
F1 Macro Score: 0.93998
1000/1000 - 8s - loss: 0.0866 - accuracy: 0.9687 - val_loss: 0.0822 - val_accuracy: 0.9691
Epoch 30/120
F1 Macro Score: 0.93925
1000/1000 - 8s - loss: 0.0879 - accuracy: 0.9686 - val_loss: 0.0864 - val_accuracy: 0.9682
Epoch 31/120
F1 Macro Score: 0.94110
1000/1000 - 8s - loss: 0.0844 - accuracy: 0.9693 - val_loss: 0.0804

F1 Macro Score: 0.94073
1000/1000 - 8s - loss: 0.0772 - accuracy: 0.9708 - val_loss: 0.0801 - val_accuracy: 0.9693
Epoch 89/120
F1 Macro Score: 0.94092
1000/1000 - 8s - loss: 0.0768 - accuracy: 0.9709 - val_loss: 0.0800 - val_accuracy: 0.9694
Epoch 90/120
F1 Macro Score: 0.94083
1000/1000 - 8s - loss: 0.0769 - accuracy: 0.9709 - val_loss: 0.0800 - val_accuracy: 0.9694
Epoch 91/120
F1 Macro Score: 0.94087
1000/1000 - 8s - loss: 0.0765 - accuracy: 0.9710 - val_loss: 0.0798 - val_accuracy: 0.9694
Epoch 92/120
F1 Macro Score: 0.94078
1000/1000 - 8s - loss: 0.0765 - accuracy: 0.9711 - val_loss: 0.0798 - val_accuracy: 0.9693
Epoch 93/120
F1 Macro Score: 0.94089
1000/1000 - 8s - loss: 0.0765 - accuracy: 0.9711 - val_loss: 0.0797 - val_accuracy: 0.9694
Epoch 94/120
F1 Macro Score: 0.94091
1000/1000 - 8s - loss: 0.0765 - accuracy: 0.9711 - val_loss: 0.0798 - val_accuracy: 0.9694
Epoch 95/120
F1 Macro Score: 0.94092
1000/1000 - 8s - loss: 0.0766 - accuracy: 0.9710 - val_loss: 0.0798 - val_accura