# imports

In [1]:
import math
import os
import pandas as pd
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras

# constants

In [2]:
BATCH_SIZE=100
EPOCHS= 100
LEARNING_RATE=0.01
THRESHOLDS=[0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

# fetch

In [3]:
working_df = pd.read_csv("../data/spam-words.csv")
working_df.head()

Unnamed: 0,words,spam
0,4,0
1,4,1
2,5,0
3,2,0
4,9,1


# prepare

In [4]:
length_all = len(working_df)
length_train = math.trunc(length_all * 0.8)
end_train = length_train - 1
start_test = length_train
train_df = working_df.iloc[:end_train]
test_df = working_df.iloc[start_test:]

# train

In [5]:
inputs = keras.Input(shape=(1,))
outputs = keras.layers.Dense(1, activation=keras.activations.sigmoid)(inputs)
model = keras.Model(inputs, outputs)

model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[
        keras.metrics.FalseNegatives(THRESHOLDS),
        keras.metrics.FalsePositives(THRESHOLDS),
        keras.metrics.TrueNegatives(THRESHOLDS),
        keras.metrics.TruePositives(THRESHOLDS),
    ],
)

history = model.fit(
    x=train_df["words"].values,
    y=train_df["spam"].values,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
)

Epoch 1/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 196.2963 - false_positives: 105.3704 - loss: 1.0045 - true_negatives: 145.2963 - true_positives: 41.4815
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 140.5926 - false_positives: 135.7222 - loss: 0.7920 - true_negatives: 107.0555 - true_positives: 105.0741
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 93.8889 - false_positives: 160.0000 - loss: 0.7135 - true_negatives: 83.1111 - true_positives: 151.4444
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 71.0000 - false_positives: 164.6296 - loss: 0.6747 - true_negatives: 81.5926 - true_positives: 171.2222
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 45.1852 - false_positives: 171.4815 - loss: 0.6449 - true_negatives: 74

# Evaluate

In [6]:
weight = model.get_weights()[0][0][0]
bias = model.get_weights()[1][0]
print(f"weight: {weight}")
print(f"bias: {bias}")


evaluation = model.evaluate(
    x=test_df["words"].values,
    y=test_df["spam"].values,
    batch_size=BATCH_SIZE,
    return_dict=True,
    verbose=0,
)
metrics_df = pd.DataFrame(columns=["metric"] + THRESHOLDS)
tp = evaluation["true_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TP"] + tp
fp = evaluation["false_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FP"] + fp
fn = evaluation["false_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FN"] + fn
tn = evaluation["true_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TN"] + tn
accuracy = []
for i in range(len(THRESHOLDS)):
    accuracy.append((tp[i] + tn[i]) / (tp[i] + tn[i] + fp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["Accuracy"] + accuracy
fpr = []
for i in range(len(THRESHOLDS)):
    fpr.append(fp[i] / (fp[i] + tn[i]))
metrics_df.loc[len(metrics_df)] = ["FPR"] + fpr
tpr = []
for i in range(len(THRESHOLDS)):
    tpr.append(tp[i] / (tp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["TPR"] + tpr
print(metrics_df)

weight: 0.7892546653747559
bias: -3.595226526260376
     metric        0.2        0.3        0.4        0.5        0.6        0.7
0        TP  97.000000  96.000000  86.000000  86.000000  71.000000  71.000000
1        FP  45.000000  21.000000  14.000000  14.000000   6.000000   6.000000
2        FN   5.000000   6.000000  16.000000  16.000000  31.000000  31.000000
3        TN  53.000000  77.000000  84.000000  84.000000  92.000000  92.000000
4  Accuracy   0.750000   0.865000   0.850000   0.850000   0.815000   0.815000
5       FPR   0.459184   0.214286   0.142857   0.142857   0.061224   0.061224
6       TPR   0.950980   0.941176   0.843137   0.843137   0.696078   0.696078
