# imports

In [1]:
import math
import numpy as np
import os
import pandas as pd
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras

# constants

In [2]:
BATCH_SIZE = 100
EPOCHS = 60
LEARNING_RATE = 0.001
THRESHOLDS = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

# fetch

In [3]:
working_df = pd.read_csv("../data/is_cammeo.csv")
working_df.head()

Unnamed: 0,major_axis_length,area,is_cammeo
0,0.221789,0.414042,1
1,-1.337768,-0.919393,0
2,-1.271277,-1.425637,0
3,-0.55648,-0.19726,0
4,-0.921744,-0.713317,0


# prepare

In [4]:
length_all = len(working_df)
length_train = math.trunc(length_all * 0.8)
end_train = length_train - 1
start_test = length_train
train_df = working_df.iloc[:end_train]
test_df = working_df.iloc[start_test:]
test_df.head()

Unnamed: 0,major_axis_length,area,is_cammeo
3048,-1.393334,-1.067168,0
3049,0.843653,0.972237,1
3050,1.917158,1.674167,1
3051,-0.917297,-0.44028,0
3052,0.815348,1.318584,1


# train

In [5]:
# DIRECT APPROACH
inputs = keras.Input(shape=(2,))
outputs = keras.layers.Dense(1, activation=keras.activations.sigmoid)(inputs)
model = keras.Model(inputs, outputs)

# # CONCATENATE APPROACH
# feature_names = ["major_axis_length", "area"]
# inputs = [
#     keras.Input(name=feature_name, shape=(1,))
#     for feature_name in feature_names
# ]
# concatenated_inputs = keras.layers.Concatenate()(inputs)
# outputs = keras.layers.Dense(1, activation=keras.activations.sigmoid)(concatenated_inputs)
# model = keras.Model(inputs, outputs)

model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=LEARNING_RATE),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[
        keras.metrics.FalseNegatives(THRESHOLDS),
        keras.metrics.FalsePositives(THRESHOLDS),
        keras.metrics.TrueNegatives(THRESHOLDS),
        keras.metrics.TruePositives(THRESHOLDS),
    ],
)

# DIRECT APPROACH
train_pairs=[]
for index, row in train_df.iterrows():
    train_pairs.append([float(row['major_axis_length']), float(row['area'])])
history = model.fit(
    x=np.array(train_pairs),
    y=train_df["is_cammeo"].values,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
)

# # CONCATENATE APPROACH
# train_features = {
#   feature_name: np.array(train_df[feature_name])
#   for feature_name in feature_names
# }
# history = model.fit(
#     x=train_features,
#     y=train_df["is_cammeo"].values,
#     batch_size=BATCH_SIZE,
#     epochs=EPOCHS,
# )

Epoch 1/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - false_negatives: 175.7396 - false_positives: 397.8542 - loss: 0.5408 - true_negatives: 543.9583 - true_positives: 526.0104
Epoch 2/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - false_negatives: 170.6875 - false_positives: 374.3906 - loss: 0.5202 - true_negatives: 563.6094 - true_positives: 534.8750
Epoch 3/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - false_negatives: 174.8542 - false_positives: 354.5208 - loss: 0.5168 - true_negatives: 566.3854 - true_positives: 547.8021
Epoch 4/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 954us/step - false_negatives: 170.2812 - false_positives: 332.7917 - loss: 0.4991 - true_negatives: 606.0833 - true_positives: 534.4062
Epoch 5/60
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 990us/step - false_negatives: 164.9740 - false_positives: 314.7396 - loss: 0.4819 - tru

# evaluate

In [12]:
major_axis_length_weight = model.get_weights()[0][0][0]
area_weight = model.get_weights()[0][1][0]
bias = model.get_weights()[1][0]
print(f"major_axis_length_weight: {major_axis_length_weight}")
print(f"area_weight: {area_weight}")
print(f"bias: {bias}")

# DIRECT APPROACH
test_pairs=[]
for index, row in test_df.iterrows():
    test_pairs.append([float(row['major_axis_length']), float(row['area'])])
evaluation = model.evaluate(
    x=np.array(test_pairs),
    y=test_df["is_cammeo"].values,
    batch_size=BATCH_SIZE,
    return_dict=True,
    verbose=0,
)

# # CONCATENATE APPROACH
# test_features = {
#   feature_name: np.array(train_df[feature_name])
#   for feature_name in feature_names
# }
# evaluation = model.evaluate(
#     x=test_features,
#     y=train_df["is_cammeo"].values,
#     batch_size=BATCH_SIZE,
#     return_dict=True,
#     verbose=0,
# )

metrics_df = pd.DataFrame(columns=["metric"] + THRESHOLDS)
tp = evaluation["true_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TP"] + tp
fp = evaluation["false_positives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FP"] + fp
fn = evaluation["false_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["FN"] + fn
tn = evaluation["true_negatives"].numpy().tolist()
metrics_df.loc[len(metrics_df)] = ["TN"] + tn
accuracy = []
for i in range(len(THRESHOLDS)):
    accuracy.append((tp[i] + tn[i]) / (tp[i] + tn[i] + fp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["Accuracy"] + accuracy
fpr = []
for i in range(len(THRESHOLDS)):
    fpr.append(fp[i] / (fp[i] + tn[i]))
metrics_df.loc[len(metrics_df)] = ["FPR"] + fpr
tpr = []
for i in range(len(THRESHOLDS)):
    tpr.append(tp[i] / (tp[i] + fn[i]))
metrics_df.loc[len(metrics_df)] = ["TPR"] + tpr
print(metrics_df)

major_axis_length_weight: 1.1050945520401
area_weight: 2.4956374168395996
bias: -0.4403350055217743
     metric         0.2         0.3         0.4         0.5         0.6  \
0        TP  299.000000  293.000000  284.000000  276.000000  266.000000   
1        FP   95.000000   67.000000   39.000000   27.000000   18.000000   
2        FN   13.000000   19.000000   28.000000   36.000000   46.000000   
3        TN  355.000000  383.000000  411.000000  423.000000  432.000000   
4  Accuracy    0.858268    0.887139    0.912073    0.917323    0.916010   
5       FPR    0.211111    0.148889    0.086667    0.060000    0.040000   
6       TPR    0.958333    0.939103    0.910256    0.884615    0.852564   

          0.7  
0  253.000000  
1   10.000000  
2   59.000000  
3  440.000000  
4    0.909449  
5    0.022222  
6    0.810897  
