In [1]:
from string import ascii_letters
import time

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import tensorflow as tf
import tensorflow_addons as tfa

# Import necessary modules
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt

# Keras specific

#### CHANGED from import keras:
import tensorflow.keras
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
#####
from keras.layers import Dense, LSTM, Embedding, Flatten, CuDNNLSTM, Bidirectional, Dropout


# from keras.utils import to_categorical

# Gemsim
import gensim.models
from gensim import utils

from numpy import array
from numpy import asarray
from numpy import zeros


from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, balanced_accuracy_score

# from tensorflow.keras.datasets import imdb
# from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.losses import BinaryCrossentropy
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
from platform import python_version

print(python_version())

3.7.3


In [3]:
print("Before:\n" ,tf.config.get_visible_devices('GPU'))
gpus = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
except IndexError as e:
    pass
print("After:\n" ,tf.config.get_visible_devices('GPU'))

Before:
 [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
After:
 [PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [4]:
class MyCorpus:
        """An iterator that yields sentences (lists of str)."""
        def __init__(self, text_list: list):
            self.text_list = text_list

        def __iter__(self):
            for line in self.text_list:
                yield line

In [5]:
# https://neptune.ai/blog/implementing-the-macro-f1-score-in-keras

### BAD

### Define F1 measures: F1 = 2 * (precision * recall) / (precision + recall)

def custom_f1(y_true, y_pred):
    def recall_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Positives = K.sum(K.round(K.clip(y_true, 0, 1)))

        recall = TP / (Positives+K.epsilon())
        return recall


    def precision_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Pred_Positives = K.sum(K.round(K.clip(y_pred, 0, 1)))

        precision = TP / (Pred_Positives+K.epsilon())
        return precision

    precision, recall = precision_m(y_true, y_pred), recall_m(y_true, y_pred)

    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [6]:
# Read the data
min_val = 50
repo_name = f"174repos_min{min_val}_max1000000_zhenhao"
# repo_name = f"300repos_min{min_val}_max1000000_zhenhao"
# repo_name = f"combination_zhenhao"
df = pd.read_csv('../features/'+ repo_name +'.csv')

# Remove errors
df = df[df.type != 'b']

no_log_cnt, log_cnt = df['contains_logging'].value_counts()
par_vec_cnt = no_log_cnt + log_cnt
log_ratio = log_cnt / par_vec_cnt
print(f"Number of parameter vecs:\t\t{par_vec_cnt}")
print(f"without logging (negatives):\t{no_log_cnt}")
print(f"with logging (positives):\t\t{log_cnt}")
print(f"Log ratio:\t\t\t\t\t\t{log_ratio * 100:.2f}%")
print(df.shape)
df.head()

Number of parameter vecs:		2328579
without logging (negatives):	2272090
with logging (positives):		56489
Log ratio:						2.43%
(2328579, 4)


Unnamed: 0,type,location,context,contains_logging
0,d,11;4-15;47,dqrqrqrrr,0
1,d,19;4-50;57,dqrqrqrrrqrrqrrqrrrqrrqrrqrrrqrrrqrrr,0
2,d,54;4-110;57,dqrqrqqrrqrqrrrrqrrrqrrrqrrrqrrrqrqrrrrqrrqrrq...,0
3,d,114;4-123;37,dqrqrqqrrrr,0
4,d,144;4-153;5,dqrqrqrrrr,0


In [7]:
# Split data into train and test sets
X = df.context
X = [list(map(lambda y: str(ascii_letters.index(y)), list(x))) for x in X]
y = df.contains_logging
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=0)

# one hot encode outputs, no longer used
# y_train_categorical = to_categorical(y_train)
y_train_categorical = None
# y_test_categorical = to_categorical(y_test)
# count_classes = y_test_categorical.shape[1]
# assert count_classes == 2

# Word2Vec Model
sentences = MyCorpus(X)
gensim_model = gensim.models.Word2Vec(sentences=sentences, min_count=1)
actual_vocab_size = len(gensim_model.wv.key_to_index)

# Pad the context prep
X_train_party = np.array([list(x) for x in X_train], dtype=object)
X_test_party = np.array([list(x) for x in X_test], dtype=object)

In [8]:
actual_vocab_size

32

In [9]:
vocab_size = actual_vocab_size + 1
output_dims = 100
max_length = 80
num_epochs = 60
# class_weight = {False: 1, True: 4}
class_weight = {0: 1.0, 1: 4.0}
batch_size = 24
dropout = 0.2
trainable = False
callback = None
callback_monitor = 'loss' # 'accuracy'
cmpltn_metrics = ['accuracy', custom_f1]

num_nodes = 128

In [15]:
iteration_features =  "Name, max_length, vocab_size, batch_size, trainable, dropout, sigmoid_out, val_split, callback, callback_monitor, num_nodes, num_epochs, class_weight, cmpltn_metrics"
iterations = [
# 1st run
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], "loss", 128, 60, {0: 1.0, 1: 1.0}),
#     # Checkpoint manuell laden sonderfall
#     (f'Z_{repo_name}_cploaded', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], "loss", 128, 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}_cploaded', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], "loss", 128, 60, {0: 1.0, 1: 4.0}),
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 60, {0: 1.0, 1: 1.0}),
    
#     (f'Y_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', 128, 60, {0: 1.0, 1: 1.0}),
# 2nd Run loss vs acc
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], "loss", 128, 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}_cploaded', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], "loss", 128, 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}_cploaded', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 60, {0: 1.0, 1: 1.0}),
# 3rd Run other Layers, num_nodes, vocab_size
#     (f'A_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', 128, 60, {0: 1.0, 1: 1.0}),
#     (f'B_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', 128, 60, {0: 1.0, 1: 1.0}),
    
    # (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], tfa.metrics.F1Score, 128, 60, {0: 1.0, 1: 1.0}),
    # # val_loss is for validation data: https://keras.io/api/callbacks/model_checkpoint/
    # (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], "val_loss", 128, 60, {0: 1.0, 1: 1.0}),
    
    
#     (f'D_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', actual_vocab_size, 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', actual_vocab_size, 60, {0: 1.0, 1: 1.0}),
#     (f'D_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', int(actual_vocab_size / 2), 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', int(actual_vocab_size / 2), 60, {0: 1.0, 1: 1.0}),
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, False, 0.2, True, 0.0, ["cp"], 'loss', int(actual_vocab_size * 2), 60, {0: 1.0, 1: 1.0}),
    
#     (f'Z_{repo_name}', 80, actual_vocab_size, 24, True, 0.2, True, 0.0, ["cp"], 'loss', 128, 60, {0: 1.0, 1: 1.0}),
    # (f'D_{repo_name}', 80, actual_vocab_size, 24, True, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 60, {0: 1.0, 1: 1.0}),
    
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, False, 0.2, True, 0.0, ["cp"], 'loss', 128, 60, {0: 1.0, 1: 1.0}),

# 4th run combined promising changes, checkpoint, later class_weight
# Using layer set Z instead of B for more epochs in same execution time
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'custom_f1', 128, 2, {0: 1.0, 1: 4.0}, ['accuracy', custom_f1]),
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 2, {0: 1.0, 1: 4.0}, ['accuracy']),
    
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'f1_score', 128, 60, {0: 1.0, 1: 3.0}, ['unused']),
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'f1_score', 128, 60, {0: 1.0, 1: 4.0}, ['unused']),
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'f1_score', 128, 60, {0: 1.0, 1: 5.0}, ['unused']),
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'f1_score', 128, 60, {0: 1.0, 1: 6.0}, ['unused']),

# Vary batch size
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 64, True, 0.2, True, 0.0, ["cp"], 'f1_score', 128, 60, {0: 1.0, 1: 5.0}, ['unused']),
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'f1_score', 128, 60, {0: 1.0, 1: 5.0}, ['unused']),


# Test model.fit(validation_data=(padded_inputs_test, y_test)) and callback_monitor='val_f1_score'
        (f'Z_{repo_name}', 80, actual_vocab_size + 1, 32, True, 0.2, True, 0.0, ["cp"], 'val_f1_score', 128, 20, {0: 1.0, 1: 5.0}, ['unused']),
# TODO: Do Crossvalidation stratified shuffled fold testing with high batch size to compensate

    
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 40, {0: 1.0, 1: 4.0}),

    
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 60, {0: 1.0, 1: 3.0}),
#     (f'Z_{repo_name}', 80, actual_vocab_size + 1, 24, True, 0.2, True, 0.0, ["cp"], 'accuracy', 128, 60, {0: 1.0, 1: 5.0}),
]

# Todo: Batch size, output dims, load_best_weights?
# Todo: Add callback_patience
# Todo: Transform into dict

# Todo: Vary monitors

all_scores = []
len(iterations)

1

In [11]:
out = open("results.txt", "a")
# out.write("Name, max_length, vocab_size, batch_size, Balanced_Accuracy, Precision_Score, Recall_Score, F1_Score")
# out.write("Name, max_length, vocab_size, batch_size, trainable, dropout, sigmoid_out, Bal_Acc, Precision, Recall, F1_Score")
out.write(iteration_features + ", settings_hash, execution_time, Final_Bal_Acc, Final_Prec, Final_Recall, Final_F1, Best_Bal_Acc, Best_Prec, Best_Recall, Best_F1")
# out.write(str(iterations[0]))
out.write("\n")
out.close()

In [12]:
# Build embedding matrix
embedding_matrix = zeros((vocab_size, output_dims))
for i in range(vocab_size):
    embedding_vector = None
    try:
        embedding_vector = gensim_model.wv[str(i)]
    except KeyError:
        pass
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

# Pad the context
padded_inputs = pad_sequences(X_train_party, maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>
padded_inputs_test = pad_sequences(X_test_party, maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>

In [16]:
for iteration in iterations:
    name, max_length, vocab_size, batch_size, trainable, dropout, sigmoid, val_split, callback, callback_monitor, num_nodes, num_epochs, class_weight, cmpltn_metrics = iteration
    print(name)
    
    # Debug
#     if num_epochs == 60:
#         num_epochs = 40
#     batch_size = 64
    # /Debug
    
    settings_hash = int((hash(str(iteration)) ** 2) ** 0.5)
    start = time.time()

    # Build embedding matrix for different vocab_size (missing different output_dims)
    if vocab_size != embedding_matrix.shape[0]:
        print("Recomputing embedding matrix...")
        embedding_matrix = zeros((vocab_size, output_dims))
        for i in range(vocab_size):
            embedding_vector = None
            try:
                embedding_vector = gensim_model.wv[str(i)]
            except KeyError:
                pass
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector
     
    # Pad the context for different max_length
    if max_length != 80:
        padded_inputs = pad_sequences(X_train_party, maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>
        padded_inputs_test = pad_sequences(X_test_party, maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>

    # Build the model
    model = Sequential()
    model.add(Embedding(vocab_size, output_dims, weights=[embedding_matrix], input_length=max_length, trainable=trainable))

    if name.startswith("A"):
        model.add(tf.keras.layers.LSTM(num_nodes, dropout=dropout, return_sequences=True))
        model.add(tf.keras.layers.LSTM(num_nodes, dropout=dropout))
    elif name.startswith("B"):
        model.add(tf.keras.layers.LSTM(num_nodes, dropout=dropout, return_sequences=True))
        model.add(tf.keras.layers.LSTM(num_nodes, dropout=dropout))
        model.add(Dense(int(num_nodes / 4),activation='relu'))
    elif name.startswith("C"):
        model.add(CuDNNLSTM(128, return_sequences=True))
        model.add(CuDNNLSTM(128))
    elif name.startswith("D"):
        model.add(CuDNNLSTM(num_nodes, return_sequences=True))
        model.add(CuDNNLSTM(num_nodes))
        model.add(Dense(32,activation='relu'))
    elif name.startswith("E"):
        model.add(Bidirectional(CuDNNLSTM(128, return_sequences=True)))
        model.add(Bidirectional(CuDNNLSTM(128)))
        model.add(Dense(32,activation='relu'))
    elif name.startswith("Y"):
        model.add(tf.keras.layers.LSTM(num_nodes, dropout=dropout))
        model.add(Dense(int(num_nodes / 4),activation='relu'))
    elif name.startswith("Z"):
        model.add(tf.keras.layers.LSTM(num_nodes, dropout=dropout))
    else:
        raise RuntimeError

    # Best so far:   (Add dropout layer?)
#     if name == "bidirectional":
#         model.add(Bidirectional(CuDNNLSTM(128, return_sequences=True)))
#         model.add(Bidirectional(CuDNNLSTM(128)))
#     else:
#         model.add(CuDNNLSTM(128, return_sequences=True))
#         model.add(CuDNNLSTM(128))
#     model.add(Dense(32,activation='relu'))

    # Dropout with layer, terrible
    # model.add(keras.layers.LSTM(128))
    # model.add(keras.layers.Dropout(dropout))

    if sigmoid:
        model.add(Dense(1, activation='sigmoid'))
    else:
        model.add(Dense(2, activation='softmax'))
    
    ######################
    #### Complilation ####
    ######################
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=#cmpltn_metrics
                  [
#         'accuracy',
#         custom_f1,
#         tf.keras.metrics.Recall(),
#         tf.keras.metrics.BinaryAccuracy(),
        tfa.metrics.F1Score(num_classes=1, threshold=0.5),
#         tfa.metrics.F1Score(),
                  ]
                 )

    
#     if callback:
#         # es = EarlyStopping(monitor='loss', mode='auto', verbose=1, patience=4)
#         es = EarlyStopping(monitor=callback_monitor, mode='auto', verbose=1, patience=20, restore_best_weights=True)
#         checkpoint = ModelCheckpoint(filepath="hybrid_model{epoch}", monitor="accuracy", mode="auto",
#                              save_best_only=True, save_weights_only=False, save_freq="epoch")
#         model.fit(padded_inputs, y_train if sigmoid else y_train_categorical,
#                   epochs=num_epochs, batch_size=batch_size, validation_split=val_split, callbacks=[es])
#     else:
#         model.fit(padded_inputs, y_train if sigmoid else y_train_categorical,
#                   epochs=num_epochs, batch_size=batch_size, validation_split=val_split)
    # Remake:

    callbacks = []
    if "es" in callback:
        # Default monitor "val_loss"?!
        es = EarlyStopping(monitor=callback_monitor,
#                            mode='auto',
                           mode='max',
                           verbose=1,
                           patience=20,
                           restore_best_weights=True)
        callbacks.append(es)
    if "cp" in callback:
        # Default monitor "val_loss"
        # No more epoch in filepath for loading the model weights after fit
#         filepath = f"zhenhao_models/{repo_name}/{settings_hash}/" + "epoch{epoch}"
        filepath = f"zhenhao_models/{repo_name}/{settings_hash}/checkpoint"
        cp = ModelCheckpoint(filepath=filepath,
                             monitor=callback_monitor,
#                              mode="auto",
                             mode="max",
                             save_best_only=True,
                             save_weights_only=True,
                             save_freq="epoch")
        callbacks.append(cp)
    
    if callbacks == []:
        callbacks = None
    
    #############
    #### FIT ####
    #############
    history = model.fit(padded_inputs,
                        y_train if sigmoid else y_train_categorical,
                        epochs=num_epochs,
                        batch_size=batch_size,
                        validation_data=(padded_inputs_test, y_test),
                        validation_split=val_split,
                        callbacks=callbacks,
                        class_weight=class_weight)

    
#     if name == f'Z_{repo_name}_cploaded':
#         print("Loading best weights...")
#         model.load_weights(filepath)
    
    # Predict
    # pred_train= model.predict(padded_inputs)
    pred_test= model.predict(padded_inputs_test, batch_size=batch_size)
    if sigmoid:
        y_pred = np.round(pred_test)
    else:
        y_pred = []
        for zero, one in pred_test:
            if zero > 0.5:
                y_pred.append(0)
            else:
                y_pred.append(1)
    
    # Now load the best weights and test again
    model.load_weights(filepath)
    best_pred_test= model.predict(padded_inputs_test, batch_size=batch_size)
    if sigmoid:
        best_y_pred = np.round(best_pred_test)
    else:
        best_y_pred = []
        for zero, one in best_pred_test:
            if zero > 0.5:
                best_y_pred.append(0)
            else:
                best_y_pred.append(1)

    end = time.time()
    execution_time = int(end - start)
    
    # Scores
    scores = [
        name,
        max_length,
        vocab_size,
        batch_size,
        trainable,
        dropout,
        sigmoid,
        val_split,
        callback,
        callback_monitor,
        num_nodes,
        num_epochs,
        class_weight,
        list(map(lambda x: x.__name__ if callable(x) else x, cmpltn_metrics)),
        settings_hash,
        execution_time,
        f"{balanced_accuracy_score(y_test, y_pred):.2f}"[2:],
        f"{precision_score(y_test, y_pred):.2f}"[2:],
        f"{recall_score(y_test, y_pred):.2f}"[2:],
        f"{f1_score(y_test, y_pred):.3f}"[2:],
        f"{balanced_accuracy_score(y_test, best_y_pred):.2f}"[2:],
        f"{precision_score(y_test, best_y_pred):.2f}"[2:],
        f"{recall_score(y_test, best_y_pred):.2f}"[2:],
        f"{f1_score(y_test, best_y_pred):.3f}"[2:],
    ]
    out = open("results.txt", "a")
    out.write(str(scores).replace("'", "")[1:-1])
    out.write("\n")
    out.close()
#     all_scores.append(scores)

Z_174repos_min50_max1000000_zhenhao
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
vars(history)

{'validation_data': None,
 'model': <keras.engine.sequential.Sequential at 0x7fa30527bbe0>,
 '_chief_worker_only': None,
 '_supports_tf_logs': False,
 'history': {'loss': [0.3096500039100647,
   0.3010702431201935,
   0.2996620237827301,
   0.2972530722618103,
   0.2955453395843506,
   0.29379507899284363,
   0.2926989793777466,
   0.29235410690307617,
   0.2913242280483246,
   0.290630966424942,
   0.28983640670776367,
   0.2896806299686432,
   0.2892162501811981,
   0.28887367248535156,
   0.2887428402900696,
   0.2896438539028168,
   0.2883419990539551,
   0.2894187271595001,
   0.2880391776561737,
   0.28797587752342224],
  'f1_score': [array([0.23132716], dtype=float32),
   array([0.2498759], dtype=float32),
   array([0.25386375], dtype=float32),
   array([0.25819632], dtype=float32),
   array([0.26188293], dtype=float32),
   array([0.26594236], dtype=float32),
   array([0.26852974], dtype=float32),
   array([0.26866344], dtype=float32),
   array([0.27133986], dtype=float32),
   a