In [34]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from notebook_helper import MyCorpus, build_model, build_callbacks, build_embedding_matrix, iteration_features

from string import ascii_letters
import time

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa

# Import necessary modules
from sklearn.model_selection import train_test_split, StratifiedKFold, StratifiedShuffleSplit

# Keras specific

#### CHANGED from import keras:
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
#####
from keras.layers import Dense, LSTM, Embedding, Flatten, CuDNNLSTM, Bidirectional, Dropout


# from keras.utils import to_categorical

# Gemsim
import gensim.models
from gensim import utils

from numpy import array
from numpy import asarray
from numpy import zeros


from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, balanced_accuracy_score

# from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.losses import BinaryCrossentropy
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [35]:
run_random_forests = True

In [37]:
try:
    print("Before:\n" ,tf.config.get_visible_devices('GPU'))
    gpus = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
    print("After:\n" ,tf.config.get_visible_devices('GPU'))
except IndexError as e:
    pass

Before:
 []


In [39]:
# Read the data
min_val = 50
repo_name = f"174repos_min{min_val}_max1000000"
# repo_name = f"300repos_min{min_val}_max1000000"
# repo_name = f"combination"
df = pd.read_csv('../features/'+ repo_name +'_expanded.csv')

# Remove errors
df = df[df.parent != 'b']
df = df[df.type != 'b']

no_log_cnt, log_cnt = df['contains_logging'].value_counts()
par_vec_cnt = no_log_cnt + log_cnt
log_ratio = log_cnt / par_vec_cnt
print(f"Number of parameter vecs:\t\t{par_vec_cnt}")
print(f"without logging (negatives):\t{no_log_cnt}")
print(f"with logging (positives):\t\t{log_cnt}")
print(f"Log ratio:\t\t\t\t\t\t{log_ratio * 100:.2f}%")
print(df.shape)
df.head()

Number of parameter vecs:		715488
without logging (negatives):	700691
with logging (positives):		14797
Log ratio:						2.07%
(715488, 31)


Unnamed: 0,type,location,parent,context,contains_class_definition,contains_function_definition,contains_if_statement,contains_for_statement,contains_match_statement,contains_while_statement,...,contains_exec_statement,contains_future_import_statement,contains_global_statement,contains_nonlocal_statement,contains_print_statement,contains_assignment,contains_call,contains_await,contains_yield,contains_logging
0,c,10;0-12;56,a,cdr,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,d,11;4-12;56,c,dr,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,d,15;0-16;67,a,dur,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,d,19;0-21;60,a,dru,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,d,24;0-32;77,a,dqfrqrr,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0


In [42]:
# for x in df.context:
#     listified = list(str(x))
#     for letter in listified:
#         try:
#             ascii_letters.index(letter)
#         except ValueError as e:
#             print("Letter:", letter)
# Convert the compacted context from letters into strings of integers
df.context = [list(map(lambda y: str(ascii_letters.index(y)), list(str(x)))) for x in df.context]
df.head()

Unnamed: 0,type,location,parent,context,contains_class_definition,contains_function_definition,contains_if_statement,contains_for_statement,contains_match_statement,contains_while_statement,...,contains_exec_statement,contains_future_import_statement,contains_global_statement,contains_nonlocal_statement,contains_print_statement,contains_assignment,contains_call,contains_await,contains_yield,contains_logging
0,c,10;0-12;56,a,"[2, 3, 17]",0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,d,11;4-12;56,c,"[3, 17]",0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,d,15;0-16;67,a,"[3, 20, 17]",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,d,19;0-21;60,a,"[3, 17, 20]",0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,d,24;0-32;77,a,"[3, 16, 5, 17, 16, 17, 17]",0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0


In [43]:
# Word2Vec Model

sentences = MyCorpus(list(df.context))
gensim_model = gensim.models.Word2Vec(sentences=sentences, min_count=1)
actual_vocab_size = len(gensim_model.wv.key_to_index)
actual_vocab_size

34

In [44]:
# Build an embedding for each context as the mean of its words' Word2Vec vectors
# corpus_embeddings = []
# for doc in sentences:
#     doc_embedding = np.zeros((len(doc), 100), dtype=np.float32)
#     for idx, word in enumerate(doc):
#                     doc_embedding[idx] = gensim_model.wv[word]
#     doc_embedding = np.mean(doc_embedding, axis=0)
#     corpus_embeddings.append(doc_embedding)
# corpus_embeddings = np.array(corpus_embeddings)
# df_corpus_embeddings = pd.DataFrame(corpus_embeddings, columns=["c"+str(i) for i in range(100)])
# df_corpus_embeddings.head()

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c90,c91,c92,c93,c94,c95,c96,c97,c98,c99
0,-0.230097,1.35769,-0.054724,0.787381,-0.583491,0.568553,-0.607659,-0.341705,-0.293219,-0.318684,...,0.457015,-0.26772,0.402871,-0.184077,-0.22512,0.004844,-0.113994,-1.016737,-0.200488,0.549677
1,-0.302136,0.546907,-0.077963,0.603035,0.232146,-0.097231,-0.318668,-0.380425,0.481469,0.076474,...,-0.254732,-0.224073,0.276448,-0.332256,0.176632,-0.287678,-0.120254,0.141745,0.063738,0.327092
2,0.524284,0.088531,-0.312373,-0.014159,0.199369,0.09794,-0.890064,0.571978,0.951072,-0.493446,...,-0.041747,-0.152901,0.009333,-0.089158,0.203666,-0.173905,-0.392186,-0.040864,-0.198627,0.650111
3,0.524284,0.088531,-0.312373,-0.014159,0.199369,0.09794,-0.890064,0.571978,0.951072,-0.493446,...,-0.041747,-0.152901,0.009333,-0.089158,0.203666,-0.173905,-0.392186,-0.040864,-0.198627,0.650111
4,-0.317444,-0.103982,-0.40728,0.383974,0.707843,-0.525569,0.244733,-0.568876,0.461856,0.524103,...,-0.766642,-0.111786,0.252804,-0.325927,0.377132,-0.406103,0.008015,0.982359,0.724123,-0.025433


In [45]:
# TF-IDF
# vectorizer = TfidfVectorizer(token_pattern="\w+")
# concat_context = [" ".join(x) for x in df.context]
# vectorize_res = vectorizer.fit_transform(concat_context)
# df_tfidf = pd.DataFrame(vectorize_res.toarray(), columns=vectorizer.get_feature_names_out())
# df_tfidf.head()

Unnamed: 0,0,1,10,11,12,13,14,15,16,17,...,30,31,32,33,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.288665,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.731951,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.452504,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.452504,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.514081,0.684461,...,0.0,0.0,0.0,0.0,0.0,0.471304,0.0,0.0,0.0,0.0


In [46]:
score_names = [
    "Balanced accuracy score",
    "Precision score",
    "Recall score",
    "F1 Binary"
]

In [47]:
# Simple Random Forest Run

if run_random_forests:
    df_regular = df.drop(["context"], axis=1)
    dataframes = [
        ("regular", df_regular),
        # ("corpus", df_corpus_embeddings.join(pd.DataFrame(df.contains_logging))),
        # ("tf-idf", df_tfidf.join(pd.DataFrame(df.contains_logging))),
        # ("regular+corpus", df_regular.join(df_corpus_embeddings)),
        # ("regular+tfidf", df_regular.join(df_tfidf)),
        # ("corpus+tfidf", df_corpus_embeddings.join(df_tfidf).join(pd.DataFrame(df.contains_logging))),
        # ("regular+corpus+tfidf", df_regular.join(df_corpus_embeddings).join(df_tfidf)),
    ]

    for df_name, df_used in dataframes:
        print(df_name)
        # Split data into train and test sets
        # X = df_used.drop(["contains_logging"], axis=1)
        X = df_used.drop(["contains_logging", "location"], axis=1)
        if "regular" in df_name:
            X = pd.get_dummies(X, columns=["type", "parent"])
        y = df_used.contains_logging
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=0)
        # X_train.head()
        classifier = RandomForestClassifier(n_estimators=9, random_state=0, n_jobs=-1)
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)

        scores = [
                balanced_accuracy_score(y_test, y_pred),
                precision_score(y_test, y_pred),
                recall_score(y_test, y_pred),
                f1_score(y_test, y_pred, average='binary', pos_label=True)
        ]
        cm = confusion_matrix(y_test, y_pred, labels=classifier.classes_)
        score_df = pd.DataFrame([scores], columns=score_names).mean().round(3)
        print(score_df)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                                      display_labels=classifier.classes_)
        disp.plot(cmap=plt.cm.Blues)
        # plt.show()

        # y_proba = classifier.predict_proba(X_test)
        # threshold = 0.5
        # y_pred_alt = np.where(y_proba[:, 1] > threshold, 1, 0)
        # scores_alt = [
        #         balanced_accuracy_score(y_test, y_pred_alt),
        #         precision_score(y_test, y_pred_alt),
        #         recall_score(y_test, y_pred_alt),
        #         f1_score(y_test, y_pred_alt, average='binary', pos_label=True)
        # ]
        # cm_alt = confusion_matrix(y_test, y_pred_alt, labels=classifier.classes_)
        # score_df_alt = pd.DataFrame([scores_alt], columns=score_names).mean().round(3)
        # print(score_df_alt)
        # disp_alt = ConfusionMatrixDisplay(confusion_matrix=cm_alt,
        #                               display_labels=classifier.classes_)
        # disp_alt.plot(cmap=plt.cm.Blues)
        plt.show()


In [49]:
# Folded Random Forest Run
if run_random_forests:
    # Hyper Params
    n_estimators = 9
    min_samples_split = 5
    min_samples_leaf = 1
    max_depth = None
    class_weight = {False: 1, True: 4}

    df_regular = df.drop(["context"], axis=1)
    dataframes = [
        ("regular", df_regular),
        # ("corpus", df_corpus_embeddings.join(pd.DataFrame(df.contains_logging))),
        # ("tf-idf", df_tfidf.join(pd.DataFrame(df.contains_logging))),
        # ("regular+corpus", df_regular.join(df_corpus_embeddings)),
        # ("regular+tfidf", df_regular.join(df_tfidf)),
        # ("corpus+tfidf", df_corpus_embeddings.join(df_tfidf).join(pd.DataFrame(df.contains_logging))),
        # ("regular+corpus+tfidf", df_regular.join(df_corpus_embeddings).join(df_tfidf)),
    ]

    for df_name, df_used in dataframes:
        print(df_name)
        all_scores = []
        conf_matrices = []
        # Split data into train and test sets
        # X = df_used.drop(["contains_logging"], axis=1)
        X = df_used.drop(["contains_logging", "location"], axis=1)
        if "regular" in df_name:
            X = pd.get_dummies(X, columns=["type", "parent"])
        y = df_used.contains_logging
        # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=0)
        # classifier = RandomForestClassifier(n_estimators=200, random_state=0)
        # classifier = RandomForestClassifier(n_estimators=9, random_state=0)
        skf = StratifiedShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
        for k_fold, (train_index, test_index) in enumerate(skf.split(X, y)):
            print(f"Starting fold {k_fold + 1}.")
            classifier = RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1,
                                           min_samples_split=min_samples_split,
                                           min_samples_leaf=min_samples_leaf,
                                           max_depth=max_depth,
                                           class_weight=class_weight)
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            classifier.fit(X_train, y_train)
            y_pred = classifier.predict(X_test)
            scores = [
                    balanced_accuracy_score(y_test, y_pred),
                    precision_score(y_test, y_pred),
                    recall_score(y_test, y_pred),
                    f1_score(y_test, y_pred, average='binary', pos_label=True)
            ]
            all_scores.append(scores)
            cm = confusion_matrix(y_test, y_pred, labels=classifier.classes_)
            conf_matrices.append(cm)
        score_df = pd.DataFrame(all_scores, columns=score_names).mean().round(3)
        print(score_df)
        avg_cm = np.mean(conf_matrices, axis=0).astype(int)
        disp = ConfusionMatrixDisplay(confusion_matrix=avg_cm,
                                      display_labels=classifier.classes_)
        disp.plot(cmap=plt.cm.Blues)
        plt.show()

In [50]:
# Data split for Tensorflow

X = df.drop(["contains_logging"], axis=1)
X = pd.get_dummies(X, columns=["type", "parent"])
y = df.contains_logging
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=0)
X_train

Unnamed: 0,location,context,contains_class_definition,contains_function_definition,contains_if_statement,contains_for_statement,contains_match_statement,contains_while_statement,contains_try_statement,contains_with_statement,...,parent_f,parent_g,parent_h,parent_i,parent_j,parent_k,parent_l,parent_m,parent_o,parent_p
585910,39;4-44;62,"[3, 16, 17, 17, 21, 21, 21]",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
577170,24;0-37;18,"[3, 16, 17, 16, 17, 4, 4, 17, 17, 11, 16, 17, 20]",0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
125864,124;4-125;22,"[3, 8, 16, 17, 16, 17, 17, 16, 16, 17, 17, 16,...",0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
169722,147;8-153;33,"[3, 5, 16, 9, 17, 17, 17, 17]",0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
343689,19;0-30;46,"[3, 16, 17, 4, 17, 20, 16, 4, 16, 17, 20, 17]",0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95090,289;4-299;69,"[3, 17, 16, 17, 17, 17]",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
449486,333;12-337;17,"[3, 16, 5, 16, 17, 16, 9, 17, 16, 9, 17, 21, 1...",0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
184605,332;4-333;63,"[3, 16, 16, 4, 17, 16, 17, 16, 11, 16]",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
594435,239;4-251;34,"[3, 16, 17, 16, 16, 9, 17, 17, 9, 17, 17, 9, 1...",0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [51]:
vocab_size = actual_vocab_size + 1
output_dims = 100
max_length = 80
num_epochs = 20
batch_size = 64
trainable=True
dropout = 0.2
val_split = 0.0
num_nodes = 128
callback = ["cp"]
callback_monitor = 'val_f1_score'
class_weight = "class_weight_not_supported"
cmpltn_metrics = [tfa.metrics.F1Score(num_classes=1, threshold=0.5)]
settings_hash = int((hash(str([vocab_size, output_dims, max_length, num_epochs, batch_size, trainable, dropout, val_split, num_nodes, callback, callback_monitor, class_weight, cmpltn_metrics])) ** 2) ** 0.5)
k_fold = 0

In [52]:
# Build embedding matrix
embedding_matrix = build_embedding_matrix(vocab_size, output_dims, gensim_model)

array([[ 0.90691203,  0.90985811,  1.3578819 , ...,  3.39418697,
        -1.23286963, -1.47876728],
       [-0.25118244,  1.57624137, -0.87413281, ..., -1.49761927,
        -1.23354733, -0.18435796],
       [-0.08601976,  2.97925544, -0.00824505, ..., -3.33370137,
        -0.72893882,  0.99484622],
       ...,
       [ 1.03554654,  0.2696079 ,  0.77830166, ...,  0.35817093,
        -0.09002549, -1.72760308],
       [-3.22901392, -0.4214204 ,  1.83062434, ..., -1.34349501,
         0.11103494, -1.81138647],
       [-0.63224596,  0.52436721,  0.77367264, ...,  0.35751042,
         0.42844871,  0.25424877]])

In [53]:
# Pad the context to create the context input
padded_inputs = pad_sequences(np.array(list(X_train.context), dtype=object), maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>
padded_inputs_test = pad_sequences(np.array(list(X_test.context), dtype=object), maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>
padded_inputs

array([[ 0,  0,  0, ..., 21, 21, 21],
       [ 0,  0,  0, ..., 16, 17, 20],
       [ 0,  0,  0, ..., 28, 14, 17],
       ...,
       [ 0,  0,  0, ..., 16, 11, 16],
       [ 0,  0,  0, ...,  9, 17, 17],
       [ 0,  0,  0, ...,  4, 17, 22]], dtype=int32)

In [54]:
# Prepare the "other" input
regular_inputs = X_train.drop(["context", "location"], axis=1)
regular_inputs_test = X_test.drop(["context", "location"], axis=1)
regular_inputs

Unnamed: 0,contains_class_definition,contains_function_definition,contains_if_statement,contains_for_statement,contains_match_statement,contains_while_statement,contains_try_statement,contains_with_statement,contains_return_statement,contains_assert_statement,...,parent_f,parent_g,parent_h,parent_i,parent_j,parent_k,parent_l,parent_m,parent_o,parent_p
585910,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
577170,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
125864,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
169722,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
343689,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95090,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
449486,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
184605,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
594435,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [55]:
# DEBUG
# num_epochs = 1
# /DEBUG

start = time.time()
# Build and run the tensorflow model using both the context and the other features as inputs

context_input = keras.Input(shape=(max_length,), name="context")
other_input = keras.Input(shape=(regular_inputs.shape[1],), name="other")

context_features = keras.layers.Embedding(vocab_size, output_dims, weights=[embedding_matrix], trainable=trainable)(context_input) # input_length?
context_features = keras.layers.LSTM(num_nodes, dropout=dropout)(context_features)

# context_features = keras.layers.LSTM(128, return_sequences=True)(context_features) # dropout?
# context_features = keras.layers.LSTM(128)(context_features)

# context_features = keras.layers.CuDNNLSTM(128, return_sequences=True)(context_features) # dropout?
# context_features = keras.layers.CuDNNLSTM(128)(context_features)
# context_features = keras.layers.Dense(32)(context_features)

other_features = keras.layers.Dense(300, activation='relu')(other_input)

# other_features = keras.layers.Dense(300, activation='relu')(other_input)
# other_features = keras.layers.Dense(100, activation='relu')(other_features)
# other_features = keras.layers.Dense(32, activation='relu')(other_features)

x = keras.layers.concatenate([context_features, other_features])

logging_pred = keras.layers.Dense(1, name="logging", activation='sigmoid')(x) # Sigmoid?

model = keras.Model(
    inputs=[context_input, other_input],
    outputs=[logging_pred],
)
# model.summary()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=cmpltn_metrics)

# Build the callbacks
callbacks, model_cp_filepath = build_callbacks(callback, callback_monitor, repo_name, settings_hash, k_fold)

# Fit the model
history = model.fit(
    {"context": padded_inputs, "other": regular_inputs},
    {"logging": y_train},
    epochs=num_epochs,
    batch_size=batch_size,
    validation_data=({"context": padded_inputs_test, "other": regular_inputs_test}, y_test),
    validation_split=val_split,
    callbacks=callbacks,
#     class_weight=class_weight
)

# Predict with final weights
pred_test= model.predict({"context": padded_inputs_test, "other": regular_inputs_test}, batch_size=batch_size)
y_pred = np.round(pred_test)

if "cp" in callback:
        # Now load the best weights and predict on test data again
        model.load_weights(model_cp_filepath)
        best_pred_test= model.predict({"context": padded_inputs_test, "other": regular_inputs_test}, batch_size=batch_size)
        best_y_pred = np.round(best_pred_test)

end = time.time()
execution_time = int(end - start)

scores = [
        time.ctime(),
        f"{repo_name[:3]}_min{min_val}_hybrid",
        max_length,
        vocab_size,
        batch_size,
        trainable,
        dropout,
        val_split,
        callback,
        callback_monitor,
        num_nodes,
        num_epochs,
        class_weight,
        list(map(lambda x: x.name if callable(x) else x, cmpltn_metrics)),
        settings_hash,
        execution_time,
        f"{balanced_accuracy_score(y_test, y_pred):.2f}"[2:],
        f"{precision_score(y_test, y_pred):.2f}"[2:],
        f"{recall_score(y_test, y_pred):.2f}"[2:],
        f"{f1_score(y_test, y_pred):.3f}"[2:],
        f"{balanced_accuracy_score(y_test, best_y_pred):.2f}"[2:],
        f"{precision_score(y_test, best_y_pred):.2f}"[2:],
        f"{recall_score(y_test, best_y_pred):.2f}"[2:],
        f"{f1_score(y_test, best_y_pred):.3f}"[2:],
    ]
out = open("results.txt", "a")
out.write("Timestamp, " + iteration_features + ", settings_hash, execution_time, Final_Bal_Acc, Final_Prec, Final_Recall, Final_F1, Best_Bal_Acc, Best_Prec, Best_Recall, Best_F1")
out.write("\n")
out.write(str(scores).replace("'", "")[1:-1])
out.write("\n")
out.close()

2022-11-02 01:22:42.674005: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/2


InvalidArgumentError: No OpKernel was registered to support Op 'CudnnRNNV2' used by {{node model/cu_dnnlstm/CudnnRNNV2}} with these attrs: [dropout=0, seed=0, T=DT_FLOAT, input_mode="linear_input", direction="unidirectional", rnn_mode="lstm", is_training=true, seed2=0]
Registered devices: [CPU]
Registered kernels:
  <no registered kernels>

	 [[model/cu_dnnlstm/CudnnRNNV2]] [Op:__inference_train_function_2744]