In [3]:
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score, auc
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.utils import class_weight
import tensorflow as tf
from math import sqrt
import pandas as pd
import numpy as np
import sys
sys.path.append('../')
from utils.costum_keys import CustomKeys as ck

In [81]:
df = pd.read_csv('../data/bot_detection_dataset.csv')
df = df.drop(columns=[
    ck.ADDRESS, 
    ck.ERC20_MOST_REC_TOKEN_TYPE, 
    ck.ERC20_MOST_SENT_TOKEN_TYPE,
    ck.DATA_SOURCE,
    ck.LABEL_SOURCE,
    ]).astype(np.float64)
df = df.dropna()
negatives = df[df[ck.FLAG] == ck.NEGATIVE_FLAG]
kaggle_labeled_bots = df[df[ck.FLAG] == ck.KAGGLE_LABELED_BOT_FLAG]
mev_bots = df[df[ck.FLAG] == ck.MEV_BOT_FLAG]
spams = df[df[ck.FLAG] == ck.SPAM_FLAG]
positives = kaggle_labeled_bots.copy()

In [82]:
n = 1000

In [83]:
negatives = negatives.sample(frac=1)
negatives, calibration = negatives.iloc[:-n], negatives.iloc[-n:]
positives = positives.sample(frac=1)

In [84]:
positive_folds = np.array_split(positives.values, 4)
negative_folds = np.array_split(negatives.values, 4)
folds = []
for i in range(4):
    fold = np.concatenate([positive_folds[i], negative_folds[i]])
    np.random.shuffle(fold)
    folds.append(fold)

In [85]:
test_data = np.copy(folds[0])
train_data = np.concatenate(folds[1:])
train_x, train_y = train_data[:, 1:], train_data[:, 0]
cal_x, cal_y = calibration.values[:, 1:], calibration.values[:, 0]
test_x, test_y = test_data[:, 1:], test_data[:, 0]
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x)
cal_x_scaled = scaler.transform(cal_x)
test_x_scaled = scaler.transform(test_x)
n_features = train_x.shape[1]


In [86]:
train_x_scaled.shape, cal_x_scaled.shape, test_x_scaled.shape

((6008, 45), (1000, 45), (2004, 45))

In [87]:
model = tf.keras.Sequential()
Dense = tf.keras.layers.Dense
model.add(Dense(64, input_shape=(n_features,), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(
    loss='binary_crossentropy',
    optimizer='adam', 
    metrics=['accuracy'],
    )

In [88]:
class_weights = class_weight.compute_class_weight(
    class_weight = 'balanced', 
    classes = np.unique(train_y),
    y = train_y.flatten(),
)
class_weights = dict(enumerate(class_weights))

In [89]:
model.fit(
    train_x_scaled,
    train_y, 
    epochs=30, 
    batch_size=32, 
    class_weight=class_weights,
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1e2fb6dd160>

In [90]:
# Make predictions on the test data
predictions = model.predict(test_x_scaled)
cal_predictions = model.predict(cal_x_scaled)



In [91]:
preds_negative = predictions[np.where(test_y == 0)[0]]
preds_positive = predictions[np.where(test_y == 1)[0]]

In [92]:
# Problem setup
alpha = 0.03 # 1-alpha is the desired type-1 error
alpha, n

(0.03, 1000)

In [93]:
# Use the outlier detection method to get a threshold on the toxicities
qhat = np.quantile(cal_predictions, np.ceil((n+1)*(1-alpha))/n)
# Perform outlier detection on the ind and ood data
outlier_ind = preds_negative > qhat # We want this to be no more than alpha on average
outlier_ood = preds_positive > qhat # We want this to be as large as possible, but it doesn't have a guarantee

In [94]:
# Calculate type-1 and type-2 errors
type1 = outlier_ind.mean()
type2 = 1-outlier_ood.mean()
print(f"The type-1 error is {type1:.4f}, the type-2 error is {type2:.4f}, and the threshold is {qhat:.4f}.")

The type-1 error is 0.0240, the type-2 error is 0.1065, and the threshold is 0.6615.


In [95]:
def __calculate_mcc(tp, tn, fp, fn):
    numerator = tp * tn - fp * fn
    denominator = sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
    mcc = numerator / denominator if denominator else 0
    return mcc

def __calculate_cohen_kappa(tn, fp, fn, tp):
    total = tp + fp + fn + tn
    p0 = (tp + tn) / total
    pe = ((tp + fp) * (tp + fn) + (tn + fp) * (tn + fn)) / (total * total)
    kappa = (p0 - pe) / (1 - pe)
    return kappa

def __calculate_scores(metrics):
    _, tn, fp, fn, tp, _, _ = metrics
    balanced_accuracy = (tp / (tp + fn) + tn / (tn + fp)) / 2
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1_score = 2 * (precision * recall) / (precision + recall)
    mcc = __calculate_mcc(tp, tn, fp, fn)
    cohen_kappa = __calculate_cohen_kappa(tn, fp, fn, tp)
    g_mean = sqrt((tp / (tp + fn)) * (tn / (tn + fp)))
    return balanced_accuracy, precision, recall, f1_score, mcc, \
        cohen_kappa, g_mean

def print_metrics(metrics):
    accuracy, tn, fp, fn, tp, auc_pr, auc_roc = metrics
    balanced_accuracy, precision, recall, f1_score, mcc, \
        cohen_kappa, g_mean =  __calculate_scores(metrics)
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Balanced Accuracy: {balanced_accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1_score:.2f}")
    print(f"AUC-ROC: {auc_roc:.2f}")
    print(f"AUC-PR: {auc_pr:.2f}")
    print(f"MCC: {mcc:.2f}")
    print(f"Cohen's Kappa: {cohen_kappa:.2f}")
    print(f"G-Mean: {g_mean:.2f}")
    print(f"True negatives: {tn}")
    print(f"False positives: {fp}")
    print(f"False negatives: {fn}")
    print(f"True positives: {tp}")
    return

In [97]:
predictions_binary = [1 if p > qhat else 0 for p in predictions]

# Print the accuracy of the model
accuracy = accuracy_score(test_y, predictions_binary)
tn, fp, fn, tp = confusion_matrix(test_y, predictions_binary).ravel()
fpr, tpr, _ = roc_curve(test_y, predictions_binary)
auc_pr = auc(fpr, tpr)
auc_roc = roc_auc_score(test_y, predictions_binary)
m = accuracy, tn, fp, fn, tp, auc_pr, auc_roc
print('Conformal Prediction')
print_metrics(m)

Conformal Prediction
Accuracy: 0.96
Balanced Accuracy: 0.93
Precision: 0.88
Recall: 0.89
F1 Score: 0.89
AUC-ROC: 0.93
AUC-PR: 0.93
MCC: 0.87
Cohen's Kappa: 0.87
G-Mean: 0.93
True negatives: 1626
False positives: 40
False negatives: 36
True positives: 302


In [98]:
predictions_binary = [1 if p > 0.5 else 0 for p in predictions]

# Print the accuracy of the model
accuracy = accuracy_score(test_y, predictions_binary)
tn, fp, fn, tp = confusion_matrix(test_y, predictions_binary).ravel()
fpr, tpr, _ = roc_curve(test_y, predictions_binary)
auc_pr = auc(fpr, tpr)
auc_roc = roc_auc_score(test_y, predictions_binary)
m = accuracy, tn, fp, fn, tp, auc_pr, auc_roc
print('Binary Prediction')
print_metrics(m)

Binary Prediction
Accuracy: 0.96
Balanced Accuracy: 0.95
Precision: 0.85
Recall: 0.93
F1 Score: 0.89
AUC-ROC: 0.95
AUC-PR: 0.95
MCC: 0.87
Cohen's Kappa: 0.86
G-Mean: 0.95
True negatives: 1612
False positives: 54
False negatives: 25
True positives: 313
