# imports

In [1]:
import math
import numpy as np
import os
import pandas as pd
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras

# constants

In [2]:
BATCH_SIZE = 100
EPOCHS = 60
LEARNING_RATE = 0.001
THRESHOLDS = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

# fetch

In [3]:
working_df = pd.read_csv("../data/is_cammeo.csv")
working_df.head()

Unnamed: 0,major_axis_length,area,is_cammeo
0,0.221789,0.414042,1
1,-1.337768,-0.919393,0
2,-1.271277,-1.425637,0
3,-0.55648,-0.19726,0
4,-0.921744,-0.713317,0


# prepare

In [4]:
length_all = len(working_df)
length_train = math.trunc(length_all * 0.8)
end_train = length_train - 1
start_test = length_train
train_df = working_df.iloc[:end_train]
test_df = working_df.iloc[start_test:]

# train

In [5]:
# DIRECT APPROACH
inputs = keras.Input(shape=(2,))
outputs = keras.layers.Dense(1, activation=keras.activations.sigmoid)(inputs)
model = keras.Model(inputs, outputs)

# # CONCATENATE APPROACH
# feature_names = ["major_axis_length", "area"]
# inputs = [
#     keras.Input(name=feature_name, shape=(1,))
#     for feature_name in feature_names
# ]
# concatenated_inputs = keras.layers.Concatenate()(inputs)
# outputs = keras.layers.Dense(1, activation=keras.activations.sigmoid)(concatenated_inputs)
# model = keras.Model(inputs, outputs)

model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[
        keras.metrics.FalseNegatives(THRESHOLDS),
        keras.metrics.FalsePositives(THRESHOLDS),
        keras.metrics.TrueNegatives(THRESHOLDS),
        keras.metrics.TruePositives(THRESHOLDS),
    ],
)

# DIRECT APPROACH
train_pairs=[]
for index, row in train_df.iterrows():
    train_pairs.append([float(row['major_axis_length']), float(row['area'])])
history = model.fit(
    x=np.array(train_pairs),
    y=train_df["is_cammeo"].values,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
)

# # CONCATENATE APPROACH
# train_features = {
#   feature_name: np.array(train_df[feature_name])
#   for feature_name in feature_names
# }
# history = model.fit(
#     x=train_features,
#     y=train_df["is_cammeo"].values,
#     batch_size=BATCH_SIZE,
#     epochs=EPOCHS,
# )

Epoch 1/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - false_negatives: 113.1094 - false_positives: 340.7240 - loss: 0.4540 - true_negatives: 589.8073 - true_positives: 599.9219
Epoch 2/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - false_negatives: 107.9375 - false_positives: 314.5729 - loss: 0.4327 - true_negatives: 621.6458 - true_positives: 599.4062
Epoch 3/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - false_negatives: 111.7031 - false_positives: 290.6667 - loss: 0.4216 - true_negatives: 631.3333 - true_positives: 609.8594
Epoch 4/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 952us/step - false_negatives: 104.3177 - false_positives: 280.4427 - loss: 0.4100 - true_negatives: 666.2448 - true_positives: 592.5573
Epoch 5/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 972us/step - false_negatives: 106.7552 - false_positives: 252.5990 - loss: 0.3938 - tru

# evaluate

In [6]:
major_axis_length_weight = model.get_weights()[0][0][0]
area_weight = model.get_weights()[0][1][0]
bias = model.get_weights()[1][0]
print(f"major_axis_length_weight: {major_axis_length_weight}")
print(f"area_weight: {area_weight}")
print(f"bias: {bias}")

# DIRECT APPROACH
test_pairs=[]
for index, row in test_df.iterrows():
    test_pairs.append([float(row['major_axis_length']), float(row['area'])])
evaluation = model.evaluate(
    x=np.array(test_pairs),
    y=test_df["is_cammeo"].values,
    batch_size=BATCH_SIZE,
    return_dict=True,
    verbose=0,
)

# # CONCATENATE APPROACH
# test_features = {
#   feature_name: np.array(train_df[feature_name])
#   for feature_name in feature_names
# }
# evaluation = model.evaluate(
#     x=test_features,
#     y=train_df["is_cammeo"].values,
#     batch_size=BATCH_SIZE,
#     return_dict=True,
#     verbose=0,
# )

metrics_df = pd.DataFrame(columns=["metric"] + THRESHOLDS)
tp = evaluation["true_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TP"] + tp
fp = evaluation["false_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FP"] + fp
fn = evaluation["false_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FN"] + fn
tn = evaluation["true_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TN"] + tn
accuracy = []
for i in range(len(THRESHOLDS)):
    accuracy.append((tp[i] + tn[i]) / (tp[i] + tn[i] + fp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["Accuracy"] + accuracy
fpr = []
for i in range(len(THRESHOLDS)):
    fpr.append(fp[i] / (fp[i] + tn[i]))
metrics_df.loc[len(metrics_df)] = ["FPR"] + fpr
tpr = []
for i in range(len(THRESHOLDS)):
    tpr.append(tp[i] / (tp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["TPR"] + tpr
print(metrics_df)

major_axis_length_weight: 2.3108391761779785
area_weight: 1.4722005128860474
bias: -0.48252856731414795
     metric         0.2         0.3         0.4         0.5         0.6  \
0        TP  302.000000  295.000000  290.000000  285.000000  274.000000   
1        FP   82.000000   57.000000   40.000000   24.000000   16.000000   
2        FN   10.000000   17.000000   22.000000   27.000000   38.000000   
3        TN  368.000000  393.000000  410.000000  426.000000  434.000000   
4  Accuracy    0.879265    0.902887    0.918635    0.933071    0.929134   
5       FPR    0.182222    0.126667    0.088889    0.053333    0.035556   
6       TPR    0.967949    0.945513    0.929487    0.913462    0.878205   

          0.7  
0  260.000000  
1   12.000000  
2   52.000000  
3  438.000000  
4    0.916010  
5    0.026667  
6    0.833333  
