In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from notebook_helper import MyCorpus, build_hybrid_model, build_callbacks, build_embedding_matrix, iteration_features
from imblearn.combine import SMOTETomek, SMOTEENN
from imblearn.over_sampling import SMOTE, RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline

from string import ascii_letters
import time

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa

# Import necessary modules
from sklearn.model_selection import train_test_split, StratifiedKFold, StratifiedShuffleSplit

# Keras specific

#### CHANGED from import keras:
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import Sequential
#####
from keras.layers import Dense, LSTM, Embedding, Flatten, CuDNNLSTM, Bidirectional, Dropout


# from keras.utils import to_categorical

# Gemsim
import gensim.models
from gensim import utils

from numpy import array
from numpy import asarray
from numpy import zeros


from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, balanced_accuracy_score

# from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.losses import BinaryCrossentropy
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
run_random_forests = False

In [3]:
try:
    print("Before:\n" ,tf.config.get_visible_devices('GPU'))
    gpus = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
    print("After:\n" ,tf.config.get_visible_devices('GPU'))
except IndexError as e:
    pass

Before:
 []


In [4]:
# Read the data
min_val = 50
repo_name = f"174repos_min{min_val}_max1000000"
# repo_name = f"300repos_min{min_val}_max1000000"
# repo_name = f"combination"
df = pd.read_csv('../features/'+ repo_name +'_expanded.csv')
# df = pd.read_csv('../features/combination.csv')

# Remove errors
df = df[df.parent != 'b']
df = df[df.type != 'b']

no_log_cnt, log_cnt = df['contains_logging'].value_counts()
par_vec_cnt = no_log_cnt + log_cnt
log_ratio = log_cnt / par_vec_cnt
print(f"Number of parameter vecs:\t\t{par_vec_cnt}")
print(f"without logging (negatives):\t{no_log_cnt}")
print(f"with logging (positives):\t\t{log_cnt}")
print(f"Log ratio:\t\t\t\t\t\t{log_ratio * 100:.2f}%")
print(df.shape)
df.head()

Number of parameter vecs:		117736
without logging (negatives):	112767
with logging (positives):		4969
Log ratio:						4.22%
(117736, 29)


Unnamed: 0,type,parent,context,contains_class_definition,contains_function_definition,contains_if_statement,contains_for_statement,contains_while_statement,contains_try_statement,contains_with_statement,...,contains_exec_statement,contains_future_import_statement,contains_global_statement,contains_nonlocal_statement,contains_print_statement,contains_assignment,contains_call,contains_await,contains_yield,contains_logging
0,g,a,avgv,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,k,g,avgvkx,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,d,a,avvwdnn,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,c,a,cmmenhnmnmnndximjtmndmnnmmq,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,c,a,cmdmimjmq,0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [5]:
# Convert the compacted context from letters into strings of integers
df.context = [list(map(lambda y: str(ascii_letters.index(y)), list(str(x)))) for x in df.context]
df.head()

Unnamed: 0,type,parent,context,contains_class_definition,contains_function_definition,contains_if_statement,contains_for_statement,contains_while_statement,contains_try_statement,contains_with_statement,...,contains_exec_statement,contains_future_import_statement,contains_global_statement,contains_nonlocal_statement,contains_print_statement,contains_assignment,contains_call,contains_await,contains_yield,contains_logging
0,g,a,"[0, 21, 6, 21]",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,k,g,"[0, 21, 6, 21, 10, 23]",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,d,a,"[0, 21, 21, 22, 3, 13, 13]",0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
3,c,a,"[2, 12, 12, 4, 13, 7, 13, 12, 13, 12, 13, 13, ...",0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,c,a,"[2, 12, 3, 12, 8, 12, 9, 12, 16]",0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [39]:
# Define X and y
X = df.drop(["location", "contains_logging"], axis=1)
# X = df.drop(["contains_logging"], axis=1)
X = pd.get_dummies(X, columns=["type", "parent"])
y = df.contains_logging

In [7]:
# Word2Vec Model

sentences = MyCorpus(list(df.context))
gensim_model = gensim.models.Word2Vec(sentences=sentences, min_count=1)
actual_vocab_size = len(gensim_model.wv.key_to_index)
actual_vocab_size

29

In [44]:
# Build an embedding for each context as the mean of its words' Word2Vec vectors
# corpus_embeddings = []
# for doc in sentences:
#     doc_embedding = np.zeros((len(doc), 100), dtype=np.float32)
#     for idx, word in enumerate(doc):
#                     doc_embedding[idx] = gensim_model.wv[word]
#     doc_embedding = np.mean(doc_embedding, axis=0)
#     corpus_embeddings.append(doc_embedding)
# corpus_embeddings = np.array(corpus_embeddings)
# df_corpus_embeddings = pd.DataFrame(corpus_embeddings, columns=["c"+str(i) for i in range(100)])
# df_corpus_embeddings.head()

Unnamed: 0,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,...,c90,c91,c92,c93,c94,c95,c96,c97,c98,c99
0,-0.230097,1.35769,-0.054724,0.787381,-0.583491,0.568553,-0.607659,-0.341705,-0.293219,-0.318684,...,0.457015,-0.26772,0.402871,-0.184077,-0.22512,0.004844,-0.113994,-1.016737,-0.200488,0.549677
1,-0.302136,0.546907,-0.077963,0.603035,0.232146,-0.097231,-0.318668,-0.380425,0.481469,0.076474,...,-0.254732,-0.224073,0.276448,-0.332256,0.176632,-0.287678,-0.120254,0.141745,0.063738,0.327092
2,0.524284,0.088531,-0.312373,-0.014159,0.199369,0.09794,-0.890064,0.571978,0.951072,-0.493446,...,-0.041747,-0.152901,0.009333,-0.089158,0.203666,-0.173905,-0.392186,-0.040864,-0.198627,0.650111
3,0.524284,0.088531,-0.312373,-0.014159,0.199369,0.09794,-0.890064,0.571978,0.951072,-0.493446,...,-0.041747,-0.152901,0.009333,-0.089158,0.203666,-0.173905,-0.392186,-0.040864,-0.198627,0.650111
4,-0.317444,-0.103982,-0.40728,0.383974,0.707843,-0.525569,0.244733,-0.568876,0.461856,0.524103,...,-0.766642,-0.111786,0.252804,-0.325927,0.377132,-0.406103,0.008015,0.982359,0.724123,-0.025433


In [45]:
# TF-IDF
# vectorizer = TfidfVectorizer(token_pattern="\w+")
# concat_context = [" ".join(x) for x in df.context]
# vectorize_res = vectorizer.fit_transform(concat_context)
# df_tfidf = pd.DataFrame(vectorize_res.toarray(), columns=vectorizer.get_feature_names_out())
# df_tfidf.head()

Unnamed: 0,0,1,10,11,12,13,14,15,16,17,...,30,31,32,33,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.288665,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.731951,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.452504,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.452504,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.514081,0.684461,...,0.0,0.0,0.0,0.0,0.0,0.471304,0.0,0.0,0.0,0.0


In [9]:
score_names = [
    "Balanced accuracy score",
    "Precision score",
    "Recall score",
    "F1 Binary"
]

In [49]:
# Folded Random Forest Run
if run_random_forests:
    X_rnd = X.drop(["context"], axis=1)
    rnd_forest_sampling_strategy = 0.05
    use_sampling = False
    # Hyper Params
    n_estimators = 9
    min_samples_split = 5
    min_samples_leaf = 1
    max_depth = None
    class_weight = {False: 1, True: 1}

    df_regular = df.drop(["context"], axis=1)
    dataframes = [
        ("regular", df_regular),
        # ("corpus", df_corpus_embeddings.join(pd.DataFrame(df.contains_logging))),
        # ("tf-idf", df_tfidf.join(pd.DataFrame(df.contains_logging))),
        # ("regular+corpus", df_regular.join(df_corpus_embeddings)),
        # ("regular+tfidf", df_regular.join(df_tfidf)),
        # ("corpus+tfidf", df_corpus_embeddings.join(df_tfidf).join(pd.DataFrame(df.contains_logging))),
        # ("regular+corpus+tfidf", df_regular.join(df_corpus_embeddings).join(df_tfidf)),
    ]

    # for df_name, df_used in dataframes:
    #     print(df_name)
    for i in [2,3,3.5,4,4.5,5]:
        print(f"Weight: {i}")
        class_weight = {False: 1, True: i}
        all_scores = []
        conf_matrices = []
        # Split data into train and test sets
        # X = df_used.drop(["contains_logging", "location"], axis=1)
        # if "regular" in df_name:
        #     X = pd.get_dummies(X, columns=["type", "parent"])
        # y = df_used.contains_logging

        # classifier = RandomForestClassifier(n_estimators=200, random_state=0)
        # classifier = RandomForestClassifier(n_estimators=9, random_state=0)
        skf = StratifiedShuffleSplit(n_splits=5, test_size=0.25, random_state=0)
        for k_fold, (train_index, test_index) in enumerate(skf.split(X_rnd, y)):
            print(f"Starting fold {k_fold + 1}.")
            classifier = RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1,
                                           min_samples_split=min_samples_split,
                                           min_samples_leaf=min_samples_leaf,
                                           max_depth=max_depth,
                                           class_weight=class_weight)
            X_train, X_test = X_rnd.iloc[train_index], X_rnd.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            # Undersample
            if use_sampling:
                # sampler = RandomUnderSampler(sampling_strategy=rnd_forest_sampling_strategy)
                sampler = RandomOverSampler(sampling_strategy=rnd_forest_sampling_strategy)
                X_train, y_train = sampler.fit_resample(X_train, y_train)

            classifier.fit(X_train, y_train)
            y_pred = classifier.predict(X_test)
            scores = [
                    balanced_accuracy_score(y_test, y_pred),
                    precision_score(y_test, y_pred),
                    recall_score(y_test, y_pred),
                    f1_score(y_test, y_pred, average='binary', pos_label=True)
            ]
            all_scores.append(scores)
            cm = confusion_matrix(y_test, y_pred, labels=classifier.classes_)
            conf_matrices.append(cm)
        score_df = pd.DataFrame(all_scores, columns=score_names).mean().round(3)
        print(score_df)
        avg_cm = np.mean(conf_matrices, axis=0).astype(int)
        disp = ConfusionMatrixDisplay(confusion_matrix=avg_cm,
                                      display_labels=classifier.classes_)
        disp.plot(cmap=plt.cm.Blues)
        plt.show()
        out = open("my_approach_rnd_forest_results", "a")
        # out.write("Name, Timestamp, use_sampling, sampling_strategy, n_estimators, min_samples_split, class_weight, " + ", ".join(score_names) + "\n")
        out.write(f"{repo_name}_over, {time.ctime()}, {use_sampling}, {rnd_forest_sampling_strategy}, {n_estimators}, {min_samples_split}, {class_weight}, " +
                  ", ".join([str(x) for x in score_df.values]) + "\n")
        out.close()

In [12]:
# Data split for Tensorflow
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, stratify=y, random_state=0)
# X_train

Unnamed: 0,context,contains_class_definition,contains_function_definition,contains_if_statement,contains_for_statement,contains_while_statement,contains_try_statement,contains_with_statement,contains_return_statement,contains_assert_statement,...,parent_c,parent_d,parent_e,parent_f,parent_g,parent_h,parent_i,parent_j,parent_k,parent_l
13978,"[2, 13, 13, 12, 12, 3, 13, 20, 13, 12, 12, 12,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
142857,"[2, 12, 13, 2, 16, 13, 2, 16, 13, 12, 12, 13, ...",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
135386,"[2, 12, 12, 13, 3, 12, 13, 3, 3, 16]",0,0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
40611,"[2, 12, 13, 12, 13, 5, 12, 13, 13, 13]",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
212407,"[2, 6, 15]",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24097,"[2, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13...",0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
129510,"[2, 3, 20, 13]",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
67815,"[2, 3, 13, 12, 13, 12, 13, 12, 12, 7, 13, 13, ...",0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
148600,"[2, 6, 12, 13, 10, 7, 13, 6, 13, 13]",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [13]:
# print("Negatives: " + str(y_train[y_train == 0].shape[0]))
# print("Positives: " + str(y_train[y_train == 1].shape[0]))
# print("Ratio:\t   " + f"{y_train[y_train == 1].shape[0] / y_train[y_train == 0].shape[0] * 100:.2f}" + "%")

Negatives: 84575
Positives: 3727
Ratio:	   4.41%


In [14]:
# Smote doesn't seem to work with the context column
# over = SMOTE(random_state=0, sampling_strategy=0.1)
# X_train_resampled, y_train_resampled = over.fit_resample(X_train, y_train)

# over = RandomOverSampler(random_state=0, sampling_strategy=0.1)
# X_train_resampled, y_train_resampled = over.fit_resample(X_train, y_train)

# sampling_strategy = 0.05
# Choose one:
# sampler = RandomUnderSampler(random_state=0, sampling_strategy=sampling_strategy)
# sampler = RandomOverSampler(random_state=0, sampling_strategy=sampling_strategy)

# X_train_resampled, y_train_resampled = sampler.fit_resample(X_train, y_train)

In [15]:
# print("Negatives: " + str(y_train_resampled[y_train_resampled == 0].shape[0]))
# print("Positives: " + str(y_train_resampled[y_train_resampled == 1].shape[0]))
# print("Ratio:\t   " + f"{y_train_resampled[y_train_resampled == 1].shape[0] / y_train_resampled[y_train_resampled == 0].shape[0] * 100:.2f}" + "%")

Negatives: 84575
Positives: 4228
Ratio:	   5.00%


In [16]:
# X_train, y_train = X_train_resampled, y_train_resampled

In [17]:
sampling_strategy = 0.05
vocab_size = actual_vocab_size + 1
output_dims = 100
max_length = 80
num_epochs = 20
batch_size = 64
trainable=True
dropout = 0.2
val_split = 0.0
num_nodes = 128
callback = ["cp"]
callback_monitor = 'val_f1_score'
class_weight = "class_weight_not_supported"
cmpltn_metrics = [tfa.metrics.F1Score(num_classes=1, threshold=0.5)]
settings_hash = int((hash(str([sampling_strategy, vocab_size, output_dims, max_length, num_epochs, batch_size, trainable, dropout, val_split, num_nodes, callback, callback_monitor, class_weight, cmpltn_metrics])) ** 2) ** 0.5) # TODO: DEPRECATE

# Cross-validation settings
n_splits = 2

In [18]:
# Build embedding matrix
embedding_matrix = build_embedding_matrix(vocab_size, output_dims, gensim_model)

In [41]:
# DEPRECATED: Not usable with k-fold cross-validation
# Pad the context
# padded_context = pad_sequences(np.array(X.context), maxlen=max_length, value=0.0)
# Prepare the "other" input
# other_input = X.drop(["context"], axis=1)

In [19]:
# DEPRECATED
# Pad the context to create the context input
# padded_inputs = pad_sequences(np.array(list(X_train.context), dtype=object), maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>
# padded_inputs_test = pad_sequences(np.array(list(X_test.context), dtype=object), maxlen=max_length, value=0.0)  # 0.0 because it corresponds with <PAD>
# padded_inputs

array([[ 0,  0,  0, ..., 10, 20, 13],
       [ 0,  0,  0, ..., 12, 12, 13],
       [ 0,  0,  0, ...,  3,  3, 16],
       ...,
       [ 0,  0,  0, ...,  2,  3, 16],
       [ 0,  0,  0, ..., 13, 13, 13],
       [ 0,  0,  0, ..., 16,  3, 13]], dtype=int32)

In [45]:
# DEPRECATED
# Prepare the "other" input
# regular_inputs = X_train.drop(["context", "location"], axis=1)
# regular_inputs_test = X_test.drop(["context", "location"], axis=1)
# regular_inputs = X_train.drop(["context"], axis=1)
# regular_inputs_test = X_test.drop(["context"], axis=1)
# regular_inputs.shape

(88803, 46)

In [21]:
# DEBUG
# num_epochs = 1
# batch_size = 128
# /DEBUG

start = time.time()

# List of (X_test_dict, y_test) of all folds
test_sets = []

model = build_hybrid_model(vocab_size, output_dims, embedding_matrix, max_length,
                       trainable, num_nodes, dropout, X.shape[1] - 1)
# model.summary()

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=cmpltn_metrics)

final_bal_acc_all, final_precision_all, final_recall_all, final_f1_all = [], [], [], []
best_bal_acc_all, best_precision_all, best_recall_all, best_f1_all = [], [], [], []
# K-fold cross-validation
skf = StratifiedShuffleSplit(n_splits=n_splits, test_size=0.25, random_state=0)
for k_fold, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"Starting fold {k_fold + 1} of {n_splits}.")
    # Split the data into train and test sets
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    # Oversample the minority class
    sampler = RandomOverSampler(sampling_strategy=sampling_strategy)
    X_train, y_train = sampler.fit_resample(X_train, y_train)
    # Pad the context to create the context input
    padded_inputs = pad_sequences(np.array(list(X_train.context), dtype=object), maxlen=max_length, value=0.0)
    padded_inputs_test = pad_sequences(np.array(list(X_test.context), dtype=object), maxlen=max_length, value=0.0)
    # Prepare the "other" input
    regular_inputs = X_train.drop(["context"], axis=1)
    regular_inputs_test = X_test.drop(["context"], axis=1)
    # Put both inputs into a dict
    X_train_dict = {"context": padded_inputs, "other": regular_inputs}
    X_test_dict = {"context": padded_inputs_test, "other": regular_inputs_test}
    # Append to the list of test sets
    test_sets.append((X_test_dict, y_test))
    # Build the callbacks
    callbacks, model_cp_filepath = build_callbacks(callback, callback_monitor, repo_name, settings_hash, k_fold)
    # Fit the model
    history = model.fit(
        X_train_dict,
        {"logging": y_train},
        epochs=num_epochs,
        batch_size=batch_size,
        validation_data=(X_test_dict, y_test),
        validation_split=val_split,
        callbacks=callbacks,
    )

    # Predict with final weights
    pred_test= model.predict(X_test_dict, batch_size=batch_size)
    y_pred = np.round(pred_test)
    final_bal_acc_all.append(balanced_accuracy_score(y_test, y_pred))
    final_precision_all.append(precision_score(y_test, y_pred))
    final_recall_all.append(recall_score(y_test, y_pred))
    final_f1_all.append(f1_score(y_test, y_pred))
    # Now load the best weights and predict on test data again
    if "cp" in callback:
            model.load_weights(model_cp_filepath)
            best_pred_test= model.predict(X_test_dict, batch_size=batch_size)
            best_y_pred = np.round(best_pred_test)
            best_bal_acc_all.append(balanced_accuracy_score(y_test, best_y_pred))
            best_precision_all.append(precision_score(y_test, best_y_pred))
            best_recall_all.append(recall_score(y_test, best_y_pred))
            best_f1_all.append(f1_score(y_test, best_y_pred))

best_fold = np.argmax(best_f1_all)

end = time.time()
execution_time = int(end - start)

scores = [
    f"{repo_name[:3]}_min{min_val}_hybrid_cv{n_splits}",
    time.ctime(),
    sampling_strategy,
    max_length,
    vocab_size,
    batch_size,
    trainable,
    dropout,
    val_split,
    callback,
    callback_monitor,
    num_nodes,
    num_epochs,
    class_weight,
    list(map(lambda x: x.name if callable(x) else x, cmpltn_metrics)),
    settings_hash,
    execution_time,
    f"{np.mean(final_bal_acc_all, axis=0):.2f}"[2:],
    f"{np.mean(final_precision_all, axis=0):.2f}"[2:],
    f"{np.mean(final_recall_all, axis=0):.2f}"[2:],
    f"{np.mean(final_f1_all, axis=0):.3f}"[2:],
    f"{np.mean(best_bal_acc_all, axis=0):.2f}"[2:],
    f"{np.mean(best_precision_all, axis=0):.2f}"[2:],
    f"{np.mean(best_recall_all, axis=0):.2f}"[2:],
    f"{np.mean(best_f1_all, axis=0):.3f}"[2:],
    best_fold,
    f"{best_f1_all[best_fold]:.3f}"[2:],
]
out = open("results.txt", "a")
out.write(iteration_features + ", Final_Bal_Acc, Final_Prec, Final_Recall, Final_F1, Best_Bal_Acc, Best_Prec, Best_Recall, Best_F1, Best_Fold, Best_Fold_F1\n")
out.write(str(scores).replace("'", "")[1:-1] + "\n")
out.close()

Hi


ValueError: Data cardinality is ambiguous:
  x sizes: 88803, 88803
  y sizes: 88302
Make sure all arrays contain the same number of samples.

In [None]:
checkpoint_filepath = 'zhenhao_models/174repos_min50_max1000000/4609183334028858880/fold2'
model.load_weights(checkpoint_filepath)

In [None]:
pred_test= model.predict(X_test_dict, batch_size=batch_size)
y_pred = np.round(pred_test)

In [None]:
print(balanced_accuracy_score(y_test, y_pred))
print(precision_score(y_test, y_pred))
print(recall_score(y_test, y_pred))
print(f1_score(y_test, y_pred))