In [None]:
# this file should be used from the root of the repository
import pyeddl.eddl as eddl
import pyecvl.ecvl as ecvl
from pyeddl.tensor import Tensor

import pandas as pd
import numpy as np
from posixpath import join
import yaml
import os

from utils.data_partitioning import load_data_split
from eddl_lib.uc5_dataset import Uc5Dataset

# paths
exp_fld = "/mnt/datasets/uc5/UC5_pipeline_forked/experiments_eddl/wp6"
cnn_fn = "cnn_84val_neptune179.onnx"
ds_fn = "img_reports_phi2_enc.tsv"
img_fld = "/mnt/datasets/uc5/std-dataset/image"

# read files from exp_fld
train_ids, valid_ids, test_ids = load_data_split(exp_fld)

cnn = eddl.import_net_from_onnx_file(join(exp_fld, cnn_fn))
eddl.build(
    cnn,
    eddl.rmsprop(0.01),
    ["soft_cross_entropy"],
    ["categorical_accuracy"],
    eddl.CS_GPU(mem="full_mem"),  # if args.gpu else eddl.CS_CPU(mem=args.mem),
    False  # do not initialize weights to random values
)
cnn.resize(1)
# eddl.summary(cnn)
eddl.set_mode(cnn, 0)

ds = pd.read_csv(join(exp_fld, ds_fn), sep="\t").set_index("filename")  # .set_index("image_filename")
print(ds.shape)
print(ds.head())

semantic_dim = eddl.getLayer(cnn, "cnn_out").output.shape[1]
print("semantic dimension:", semantic_dim)


In [None]:
# aux functions
def load_image(filename):
    augs = ecvl.SequentialAugmentationContainer([
                ecvl.AugToFloat32(divisor=255.0),
                ecvl.AugNormalize([0.48197903, 0.48197903, 0.48197903], [0.26261734, 0.26261734, 0.26261734]),
                ecvl.AugResizeDim([300, 300]),
                ecvl.AugCenterCrop([224, 224]),  # to do: test random crop also in prediction
                ])
    img = ecvl.ImRead(filename, flags=None)  # , flags=ecvl.ImReadMode.GRAYSCALE)
    ecvl.RearrangeChannels(img, img, "xyc")
    augs.Apply(img)
    ecvl.RearrangeChannels(img, img, "cxy")
    return img

def label_list(lab_str):
    return [int(s) for s in lab_str.split(";")]

In [None]:
split_ids = test_ids

predictions = np.empty( (semantic_dim, len(split_ids)) )
targets = np.empty_like(predictions)
predictions.fill(np.nan)
targets.fill(np.nan)

for pos, id in enumerate(split_ids):
    if (pos % 100) == 0:
        print(".", end="")
    img = load_image(join(img_fld, id))
    # img = ecvl.ImageToTensor(img)
    a = np.expand_dims(np.array(img, copy=False), axis=0)  # add batch dimension
    eddl.forward(cnn, [Tensor.fromarray(a)])

    layer = eddl.getLayer(cnn, "cnn_out")
    p = np.array(eddl.getOutput(layer), copy=False)
    predictions[:, pos] = p
    
    labels = np.zeros_like(p)
    for l in label_list(ds.loc[id].labels):
        labels[0, l] = 1    
    targets[:, pos] = labels
    # print(labels)
    # if pos == 50:
    #     print("dev mode, breaking at 50")
    #     break
    #print(p.shape)
    # cnn_out_in = eddl.Input([semantic_dim], name="in_semantic_features")
# for i, split in enumerate([train_ids, valid_ids, test_ids]):


In [None]:
from sklearn.metrics import roc_curve, accuracy_score
import matplotlib.pyplot as plt
import math

sel_thresholds = np.zeros((predictions.shape[0]),)
for i in range(predictions.shape[0]):
    y_est = predictions[i, :]
    y = targets[i, :]
    fpr, tpr, thresholds = roc_curve(y, y_est)
    gmeans = np.sqrt(tpr * (1-fpr))
    m2 = gmeans[np.argmax(gmeans)]
    
    y_est1 = np.where(y_est > m2, 1, 0)
    print("***")
    print(f"label {i}, with auc:", accuracy_score(y_est1, y)*100)
    y_est2 = np.where(y_est > 0.5, 1, 0)
    print(f"label {i}, acc:", accuracy_score(y_est2, y)*100)
    y_est3 = np.where(y_est > 0, 1, 0)
    print(f"label {i}, with 0:", accuracy_score(y_est3, y)*100)
    # plt.scatter(range(len(y_est)), y_est, s = 1)
    
    # print(fpr)
    # #  Youdenâ€™s J statistic
    J = tpr - fpr
    m = np.argmax(J)
    sel_thresholds[i] = m2
    # print(f"label {i}, threshold: {thresholds[-1]:.2f}, gmeans: {m2:.2f}")
    # if i == 3:
    #     break