### 1 - Load data

In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

In [3]:
iris = load_iris()
data = pd.DataFrame(iris.data, columns=iris.feature_names)
data['target'] = iris.target

### 2 - Creating the Layers for the Model.

In [4]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import layers as L

@tf.keras.utils.register_keras_serializable()
def smish(x):
    return x * K.tanh(K.log(1 + K.sigmoid(x)))


@tf.keras.utils.register_keras_serializable()
class GatedLinearUnit(L.Layer):
    def __init__(self, units, **kwargs):
        super().__init__(**kwargs)
        self.linear = L.Dense(units)
        self.sigmoid = L.Dense(units, activation="sigmoid")
        self.units = units

    def get_config(self):
        config = super().get_config()
        config['units'] = self.units
        return config
    
    def call(self, inputs):
        return self.linear(inputs) * self.sigmoid(inputs)
    

@tf.keras.utils.register_keras_serializable()
class GatedResidualNetwork(L.Layer):
    def __init__(self, units, dropout_rate, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.dropout_rate = dropout_rate
        self.relu_dense = L.Dense(units, activation=smish)
        self.linear_dense = L.Dense(units)
        self.dropout = L.Dropout(dropout_rate)
        self.gated_linear_unit = GatedLinearUnit(units)
        self.layer_norm = L.LayerNormalization()
        self.project = L.Dense(units)

    def get_config(self):
        config = super().get_config()
        config['units'] = self.units
        config['dropout_rate'] = self.dropout_rate
        return config
    
    def call(self, inputs):
        x = self.relu_dense(inputs)
        x = self.linear_dense(x)
        x = self.dropout(x)
        if inputs.shape[-1] != self.units:
            inputs = self.project(inputs)
        x = inputs + self.gated_linear_unit(x)
        x = self.layer_norm(x)
        return x
    

@tf.keras.utils.register_keras_serializable()
class VariableSelection(L.Layer):
    def __init__(self, num_features, units, dropout_rate, **kwargs):
        super().__init__(**kwargs)
        self.grns = list()
        # Create a GRN for each feature independently
        for idx in range(num_features):
            grn = GatedResidualNetwork(units, dropout_rate)
            self.grns.append(grn)
        # Create a GRN for the concatenation of all the features
        self.grn_concat = GatedResidualNetwork(units, dropout_rate)
        self.softmax = L.Dense(units=num_features, activation="softmax")
        self.num_features = num_features
        self.units = units
        self.dropout_rate = dropout_rate

    def get_config(self):
        config = super().get_config()
        config['num_features'] = self.num_features
        config['units'] = self.units
        config['dropout_rate'] = self.dropout_rate
        return config
    
    def call(self, inputs):
        v = L.concatenate(inputs)
        v = self.grn_concat(v)
        v = tf.expand_dims(self.softmax(v), axis=-1)

        x = []
        for idx, input_ in enumerate(inputs):
            x.append(self.grns[idx](input_))
        x = tf.stack(x, axis=1)

        outputs = tf.squeeze(tf.matmul(v, x, transpose_a=True), axis=1)
        return outputs
    

@tf.keras.utils.register_keras_serializable()
class VariableSelectionFlow(L.Layer):
    def __init__(self, num_features, units, dropout_rate, dense_units=None, **kwargs):
        super().__init__(**kwargs)
        self.variableselection = VariableSelection(num_features, units, dropout_rate)
        self.split = L.Lambda(lambda t: tf.split(t, num_features, axis=-1))
        self.dense = dense_units
        if dense_units:
            self.dense_list = [L.Dense(dense_units, \
                                       activation='linear') \
                               for _ in tf.range(num_features)
                              ]
        self.num_features = num_features
        self.units = units
        self.dropout_rate = dropout_rate
        self.dense_units = dense_units
        
    def get_config(self):
        config = super().get_config()
        config['num_features'] = self.num_features
        config['units'] = self.units
        config['dropout_rate'] = self.dropout_rate
        config['dense_units'] = self.dense_units
        return config        
    
    def call(self, inputs):
        split_input = self.split(inputs)
        if self.dense:
            l = [self.dense_list[i](split_input[i]) for i in range(len(self.dense_list))]
        else:
            l = split_input
        return self.variableselection(l)        

2023-08-21 16:24:59.930962: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### 3 - Split data

In [5]:
from sklearn.model_selection import train_test_split

trainX, testX, trainy, testy = train_test_split(data.drop(columns=['target'], axis=1), data[['target']], random_state=42)

### 4 - Models weights

In [13]:
%%time
from sklearn.model_selection import KFold
from tensorflow.keras.losses import CategoricalCrossentropy
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras import Model
from tensorflow.keras import optimizers as O
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback
blls = []

batch_size = 32

units_1 = 32
drop_1 = 0.75
dense_units = 8

units_2 = 16
drop_2 = 0.5

units_3 = 8
drop_3 = 0.25

cv = KFold(n_splits=5, shuffle=True, random_state=42)#StratifiedKFold(n_splits=5, shuffle=True, random_state=722)
X = trainX.reset_index(drop=True).values
y = trainy.reset_index(drop=True).values
y_enc = LabelEncoder().fit_transform(y)
y_label = tf.keras.utils.to_categorical(y_enc)
for n, (train_idx, val_idx) in enumerate(cv.split(X, y)):
    for k in range(3):
        print(f'______fold {n+1}______, ________repeat {k+1}__________')
        inputs_1 = tf.keras.Input(shape=(trainX.shape[1],))
        
        features_1 = VariableSelectionFlow(trainX.shape[1], units_1, drop_1, dense_units=dense_units)(inputs_1)
        features_2 = VariableSelectionFlow(units_1, units_2, drop_2)(features_1)         
        features_3 = VariableSelectionFlow(units_2, units_3, drop_3)(features_2)         

        outputs = L.Dense(3, activation="softmax")(features_3)

        model = Model(inputs=inputs_1, outputs=outputs)      

        opt = O.Adam(1e-3, epsilon=1e-7)
        loss = CategoricalCrossentropy()

        lr = ReduceLROnPlateau(monitor="val_loss", mode='min', factor=0.95, patience=1, verbose=1)
        es = EarlyStopping(monitor='val_loss', mode='min', patience=25, verbose=1, restore_best_weights=True)

        model.compile(optimizer=opt, loss=loss)
        history = model.fit(x=X[train_idx], y=y_label[train_idx],
                          batch_size=batch_size,
                          epochs=20,
                          validation_data=(X[val_idx], y_label[val_idx]),
                          callbacks=[lr, es])
                
        #probs = model.predict(X[val_idx])[:,0]
        bll = loss(y_label[val_idx], model.predict(X[val_idx]))
        blls.append(bll)
        val_loss = np.asarray(history.history['val_loss'])
        train_loss = np.asarray(history.history['loss'])
        min_val_loss = val_loss.min()
        min_train_loss = train_loss[val_loss.argmin()]
        print(f'min_train_loss: {min_train_loss:.4f}, min_val_loss: {min_val_loss:.4f}, bll: {bll:.4f}')  
        
        model.save_weights(f'models_weights/mod_f{n}_r{k}.h5')
        
print(np.mean(blls))

  y = column_or_1d(y, warn=True)


______fold 1______, ________repeat 1__________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 16: ReduceLROnPlateau reducing learning rate to 0.0009500000451225787.
Epoch 17/20
Epoch 17: ReduceLROnPlateau reducing learning rate to 0.0009025000152178108.
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 20: ReduceLROnPlateau reducing learning rate to 0.0008573750033974647.
min_train_loss: 0.4301, min_val_loss: 0.3684, bll: 0.3906
______fold 1______, ________repeat 2__________
Epoch 1/20
Epoch 2/20
Epoch 2: ReduceLROnPlateau reducing learning rate to 0.0009500000451225787.
Epoch 3/20
Epoch 3: ReduceLROnPlateau reducing learning rate to 0.0009025000152178108.
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
min

In [14]:
import os

mfolder = 'models_weights/'
models_weights = os.listdir(mfolder)

In [24]:
y_pred = np.zeros_like(testy.iloc[:,0].values, dtype=np.float32)
batch_size = 32

units_1 = 32
drop_1 = 0.75
dense_units = 8

units_2 = 16
drop_2 = 0.5

units_3 = 8
drop_3 = 0.25

predys = [[0]*len(testX) for _ in range(len(models_weights))]
predyss = [[0]*len(testX) for _ in range(len(models_weights))]
for n, model_weights in enumerate(models_weights):
    inputs_1 = tf.keras.Input(shape=(4,))
    
    features_1 = VariableSelectionFlow(4, units_1, drop_1, dense_units=dense_units)(inputs_1)
    features_2 = VariableSelectionFlow(units_1, units_2, drop_2)(features_1)
    features_3 = VariableSelectionFlow(units_2, units_3, drop_3)(features_2)

    outputs = L.Dense(3, activation="softmax")(features_3)

    model = Model(inputs=inputs_1, outputs=outputs)
    model.load_weights(mfolder + model_weights)
    predy_ = model.predict(testX)
    predys[n] = np.argmax(predy_, axis=1)
    predyss[n] = predy_



In [25]:
predy=[]
for j in range(len(predys[0])):
    lst = [predys[i][j] for i in range(len(predys))]
    predy.append(max(set(lst), key=lst.count))

In [26]:
print('測試集準確率:',format(sum(predy==testy.values.ravel())*100/len(testy), '.2f'),'%')

測試集準確率: 97.37 %


In [92]:
predy = np.argmax(np.divide((sum(predyss)/15).T, np.sum(sum(predyss)/15, axis=1)), axis=0)

In [93]:
print('測試集準確率:',format(sum(predy==testy.values.ravel())*100/len(testy), '.2f'),'%')

測試集準確率: 97.37 %
