# import

In [1]:
import math
import numpy as np
import os
import pandas as pd
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras

# constants

In [2]:
BATCH_SIZE = 100
EPOCHS = 100
LEARNING_RATE = 0.005
THRESHOLDS = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

# fetch

In [3]:
working_df = pd.read_csv("../data/is_in.csv")
working_df.head()

Unnamed: 0,x,y,is_in
0,-0.342671,-0.377219,1
1,-2.189994,-3.914946,0
2,-0.087206,-3.781911,0
3,2.124552,-2.604753,0
4,3.807756,-3.584953,0


# prepare

In [4]:
length_all = len(working_df)
length_train = math.trunc(length_all * 0.8)
end_train = length_train - 1
start_test = length_train
train_df = working_df.iloc[:end_train]
test_df = working_df.iloc[start_test:]

# train

In [5]:
inputs = keras.Input(shape=(2,))
hidden = keras.layers.Dense(3, activation=keras.activations.relu)(inputs)
outputs = keras.layers.Dense(1, activation=keras.activations.sigmoid)(hidden)
model = keras.Model(inputs, outputs)

model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[
        keras.metrics.FalseNegatives(THRESHOLDS),
        keras.metrics.FalsePositives(THRESHOLDS),
        keras.metrics.TrueNegatives(THRESHOLDS),
        keras.metrics.TruePositives(THRESHOLDS),
    ],
)

train_pairs=[]
for index, row in train_df.iterrows():
    train_pairs.append([float(row['x']), float(row['y'])])
history = model.fit(
    x=np.array(train_pairs),
    y=train_df["is_in"].values,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
)

Epoch 1/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 82.4444 - false_positives: 139.8148 - loss: 0.8213 - true_negatives: 165.4074 - true_positives: 100.7778
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 88.2593 - false_positives: 140.6852 - loss: 0.7920 - true_negatives: 166.8704 - true_positives: 92.6296
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 91.0556 - false_positives: 131.4259 - loss: 0.7201 - true_negatives: 181.1296 - true_positives: 84.8333
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 96.5741 - false_positives: 125.4445 - loss: 0.6952 - true_negatives: 183.5555 - true_positives: 82.8704
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - false_negatives: 98.1296 - false_positives: 116.4445 - loss: 0.6636 - true_negatives: 195.

# evaluate

In [6]:
test_pairs=[]
for index, row in test_df.iterrows():
    test_pairs.append([float(row['x']), float(row['y'])])
evaluation = model.evaluate(
    x=np.array(test_pairs),
    y=test_df["is_in"].values,
    batch_size=BATCH_SIZE,
    return_dict=True,
    verbose=0,
)

metrics_df = pd.DataFrame(columns=["metric"] + THRESHOLDS)
tp = evaluation["true_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TP"] + tp
fp = evaluation["false_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FP"] + fp
fn = evaluation["false_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FN"] + fn
tn = evaluation["true_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TN"] + tn
accuracy = []
for i in range(len(THRESHOLDS)):
    accuracy.append((tp[i] + tn[i]) / (tp[i] + tn[i] + fp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["Accuracy"] + accuracy
fpr = []
for i in range(len(THRESHOLDS)):
    fpr.append(fp[i] / (fp[i] + tn[i]))
metrics_df.loc[len(metrics_df)] = ["FPR"] + fpr
tpr = []
for i in range(len(THRESHOLDS)):
    tpr.append(tp[i] / (tp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["TPR"] + tpr
print(metrics_df)

     metric        0.2        0.3      0.4         0.5         0.6         0.7
0        TP  73.000000  71.000000   66.000   62.000000   53.000000   49.000000
1        FP  61.000000  33.000000   21.000   11.000000    4.000000    3.000000
2        FN   2.000000   4.000000    9.000   13.000000   22.000000   26.000000
3        TN  64.000000  92.000000  104.000  114.000000  121.000000  122.000000
4  Accuracy   0.685000   0.815000    0.850    0.880000    0.870000    0.855000
5       FPR   0.488000   0.264000    0.168    0.088000    0.032000    0.024000
6       TPR   0.973333   0.946667    0.880    0.826667    0.706667    0.653333


# predict

In [7]:
working_pairs=[]
for index, row in working_df.iterrows():
    working_pairs.append([float(row['x']), float(row['y'])])
predictions = model.predict(
    x=np.array(working_pairs),
    batch_size=BATCH_SIZE,
    verbose=0,
)
predictions_df = pd.DataFrame(predictions)
predictions_df.head()

Unnamed: 0,0
0,0.983525
1,0.007584
2,0.126639
3,0.130679
4,0.001002


In [8]:
predictions_df.to_csv("../data/is_in_predictions.csv", index=False)