In [None]:
import json
from numpy import count_nonzero as nnz
from collections import defaultdict
import json
import numpy as np
import pandas as pd
import pickle
from posixpath import join
import pyeddl.eddl as eddl
import pyecvl.ecvl as ecvl
from pyeddl.tensor import Tensor
from tqdm import tqdm
from sklearn.metrics import accuracy_score, jaccard_score


In [None]:


#> paths
fld = "/mnt/datasets/uc5/UC5_pipeline_forked/experiments_eddl/eddl_ext_CNN_20tags"
img_fld = "../data/image"
mdl_fn = join(fld, "cnn_checkpoint.onnx")
ds_fn = join(fld, "img_reports_ext_enc.tsv")


exs = {}
partitions = {"training": "train_ids.txt", "validation": "valid_ids.txt", "test": "test_ids.txt"}
for key, fn in partitions.items():
    with open(join(fld, fn), "r") as fin:
        lines = [line.strip() for line in fin.readlines()]
        exs[key] = lines
#<



#>
# ! gpu
bs = 32

mean = [0.48197903, 0.48197903, 0.48197903]
std = [0.26261734, 0.26261734, 0.26261734]
test_augs =  lambda x: ecvl.SequentialAugmentationContainer([
                ecvl.AugResizeDim([300, 300]),
                ecvl.AugCenterCrop([x, x]),
                # ecvl.AugRandomCrop([size, size]),  # XXX should be parametric, for resnet 18
                ecvl.AugToFloat32(divisor=255.0),
                ecvl.AugNormalize(mean, std),
            ])
# ! image size
test_augs = test_augs(224)
#<

#>
cnn = eddl.import_net_from_onnx_file(mdl_fn)
eddl_cs = eddl.CS_GPU(g=[1,0,0,0], mem="full_mem")
eddl.build(cnn, eddl.adam(0.01), ["softmax_cross_entropy"], ["accuracy"], eddl_cs, init_weights=False)
eddl.set_mode(cnn, 0)
#<

ds = pd.read_csv(ds_fn, sep="\t")

# ! careful here
if False:
    print("reducing dataset!")
    ds = ds.iloc[:100]

def load_image(path, augs=None):
    img = ecvl.ImRead(path)
    if augs:
        augs.Apply(img)
    ecvl.RearrangeChannels(img, img, "cxy")
    return img    

def classify(img, cnn, theta = 0.5, dev=False):
    cnn_out = eddl.getLayer(cnn, "cnn_out")
    cnn_top = eddl.getLayer(cnn, "top")
    # -
    a = np.expand_dims(np.array(img, copy=False), axis=0)  # add batch dimension
    eddl.forward(cnn, [Tensor.fromarray(a)])
    # - 
    cnn_semantic = eddl.getOutput(cnn_out)
    output = np.squeeze(np.array(cnn_semantic))
    tags = np.where(output > theta)[0]
    # c = np.argmax(classes, axis=-1)
    return tags, output
#<



for key, l in exs.items():
    tags = []
    output = defaultdict(list)
    
    print(f"split: {key}")
    indexes   = ds.filename.isin(l)
    filenames = ds.loc[indexes, "filename"].tolist()
    labels    = ds.loc[indexes, "labels"].tolist()
    for fn in tqdm(filenames):
        img  = load_image(join(img_fld, fn), augs=test_augs)
        tags_, outs_ = classify(img, cnn)
        tags.append(";".join(str(t) for t in tags_))
        output[key].append(outs_)
    ds.loc[indexes, "split"] = key
    ds.loc[indexes, "tags"]  = tags

ds.to_csv(join(fld, "cnn_tags.tsv"), sep="\t")
with open(join(fld, "output.pkl"), "wb") as fout:
    pickle.dump(output, fout)

print("file saved")



In [None]:
print(ds.labels)
print(ds.tags)

In [None]:


ds = pd.read_csv(join(fld, "cnn_tags.tsv"), sep="\t", na_filter=False)
with open(join(fld, "output.pkl"), "rb") as fin:
    output = pickle.load(fin)

with open( join(fld, "index2lab.json"), "r") as fin:
    index2lab = json.load(fin)

n_classes = len(index2lab)

print(f"|labels|={n_classes}")
print(ds.columns)

def onehot(x):
    global empty_cnt
    if len(x) > 0:
        values = [int(l) for l in x.split(";")]
    else:
        values = []
    e = np.zeros((n_classes,), dtype=int)
    e[values] = 1
    return e

target = ds.labels.apply(lambda x: onehot(x))
pred = ds.tags.apply(lambda x: onehot(x))
untagged_idx = ds.tags.str.len() == 0
print(f"|untagged exs|={nnz(untagged_idx)}")



splits = ["training", "validation", "test"]
print(ds.split.unique())
for s in splits:
    idxs = ds.split.str.match(s)
    print(f"Split {s}, {nnz(idxs)}")
    target_values = ds.labels[idxs]
    predicted_values = ds.tags[idxs]
    if s == "test":
        print(ds.loc[idxs, ["labels", "tags"]])

    accuracy = accuracy_score(target_values, predicted_values)
    jaccard = jaccard_score(target_values, predicted_values, labels=list(range(n_classes)), average="micro")
    print(f"{s}, accuracy = {accuracy:.2f}")
    print(f"{s}, jaccard = {jaccard:.2f}")

print(index2lab["16"])

In [None]:
a = np.zeros((10,))
a[1] = 1
a[3] = 2
print(np.where(a > 0.5))
print(a > 0.5)

In [None]:
t = np.array([ [1, 0, 0], [0, 1, 1] ])
p = np.array([ [1, 0, 0], [0, 0, 1] ])

print(jaccard_score(t, p, labels=[0,1,2], average="macro"))
