In [1]:
import os
import re
import pandas as pd
from tqdm.notebook import tqdm
import subprocess
from PIL import Image
import numpy as np

In [2]:
df = pd.read_csv('../../dataset/OpenPart.csv')
df.head()

Unnamed: 0,Case,Sample 1,Sample 2,Sample 3
0,00000072_000.png,1,5,1
1,00000150_002.png,5,5,3
2,00000181_061.png,4,4,3
3,00000211_019.png,4,4,2
4,00000211_041.png,3,5,2


In [3]:
def rename_file(num):
    def fun(row):
        row[0] = f'dataset/sample_{num}/{row[0]}'
        row[0] = row[0].replace('.png', f'_s{num}.png')
        return row
    return fun

def rename_expert_file(file_name):
    file_name = 'dataset/Expert/' + file_name[:-4] + '_expert.png'
    return file_name

In [4]:
df_s1 = df.iloc[:,[0,1]]
df_s1 = df_s1.rename(columns={"Case": "pred_mask_path", "Sample 1": "review"})
df_s1['pred_mask_path'].apply(rename_expert_file)
df_s1['true_mask_path'] = df_s1['pred_mask_path'].apply(rename_expert_file)
df_s1 = df_s1.apply(rename_file(1), axis=1)
df_s1 = df_s1[['true_mask_path', 'pred_mask_path', 'review']]

df_s2 = df.iloc[:,[0,1]]
df_s2 = df_s2.rename(columns={"Case": "pred_mask_path", "Sample 1": "review"})
df_s2['true_mask_path'] = df_s2['pred_mask_path'].apply(rename_expert_file)
df_s2 = df_s2.apply(rename_file(2), axis=1)
df_s2 = df_s2[['true_mask_path', 'pred_mask_path', 'review']]

df_s3 = df.iloc[:,[0,1]]
df_s3 = df_s3.rename(columns={"Case": "pred_mask_path", "Sample 1": "review"})
df_s3['true_mask_path'] = df_s3['pred_mask_path'].apply(rename_expert_file)
df_s3 = df_s3.apply(rename_file(3), axis=1)
df_s3 = df_s3[['true_mask_path', 'pred_mask_path', 'review']]

In [5]:
_metrics = [
    'DICE', 'JACRD', 'AUC', 'KAPPA', 'RNDIND', 'ADJRIND', 'ICCORR', 'VOLSMTY', 'MUTINF',
    'HDRFDST', 'MAHLNBS', 'VARINFO', 'GCOERR', 'PROBDST', 'SNSVTY', 'SPCFTY', 'PRCISON',
    'FMEASR', 'ACURCY', 'FALLOUT', 'TP', 'FP', 'TN', 'FN', 'REFVOL', 'SEGVOL'
]

In [6]:
new_df = pd.concat([df_s1, df_s2, df_s3]).reset_index(drop=True)

new_df['true_mask_pixels'] = 0
new_df['pred_mask_pixels'] = 0

for t in _metrics:
    new_df[t] = 0

In [7]:
def calculate_metrics(true_path, pred_path, debug=False):    
    cmd_metrics = ','.join(_metrics)
    
    metrics = subprocess.run(['./scripts/evaluate',
                              true_path,
                              pred_path,
                             '-use', cmd_metrics],
                              cwd=os.path.realpath(os.path.join(os.getcwd(), '..', '..')),
                              capture_output=True)
    
    metrics = metrics.stdout.decode("utf-8").strip()
    if debug:
        print(metrics)
    metrics = re.findall(r"([A-Z]+)\s+=\s([\.\d]+)\s+[\w\(\)\-,\s]+\s?$",
                         metrics, re.MULTILINE)
    
    return metrics

In [8]:
def calc_pixels(path):
    img = Image.open(f'../../{path}').convert('L')
    np_img = np.array(img)
    np_img[np_img > 0] = 1
    return np.count_nonzero(np_img)

In [9]:
for i in tqdm(new_df.index, ncols='100%'):
    true_mask_path = new_df.loc[i, ['true_mask_path']][0]
    pred_mask_path = new_df.loc[i, ['pred_mask_path']][0]
    metrics = calculate_metrics(true_mask_path, pred_mask_path)
    
    for metric_name, value in metrics:
        new_df.loc[i,[metric_name]] = value
        
    new_df.loc[i,['true_mask_pixels']] = calc_pixels(true_mask_path)
    new_df.loc[i,['pred_mask_pixels']] = calc_pixels(pred_mask_path)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, layout=Layout(flex='2'), max=180.0), HTML(value='')), …




In [10]:
new_df

Unnamed: 0,true_mask_path,pred_mask_path,review,true_mask_pixels,pred_mask_pixels,DICE,JACRD,AUC,KAPPA,RNDIND,...,PRCISON,FMEASR,ACURCY,FALLOUT,TP,FP,TN,FN,REFVOL,SEGVOL
0,dataset/Expert/00000072_000_expert.png,dataset/sample_1/00000072_000_s1.png,1,0,2121,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,dataset/Expert/00000150_002_expert.png,dataset/sample_1/00000150_002_s1.png,5,2729,959,0.520065,0.351411,0.675705,0.519415,0.996630,...,1.000000,0.520065,0.998312,0.000000,959,0,1045847,1770,2729,959
2,dataset/Expert/00000181_061_expert.png,dataset/sample_1/00000181_061_s1.png,4,62059,173279,0.519559,0.350949,0.935726,0.473689,0.807597,...,0.352818,0.519559,0.892172,0.113676,61136,112143,874374,923,62059,173279
3,dataset/Expert/00000211_019_expert.png,dataset/sample_1/00000211_019_s1.png,4,138819,251580,0.708670,0.548791,0.936005,0.648735,0.806597,...,0.549853,0.708670,0.891534,0.124482,138332,113248,796509,487,138819,251580
4,dataset/Expert/00000211_041_expert.png,dataset/sample_1/00000211_041_s1.png,3,124276,285638,0.562957,0.391747,0.872117,0.476487,0.716678,...,0.403945,0.562957,0.829149,0.184200,115382,170256,754044,8894,124276,285638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,dataset/Expert/00011237_006_expert.png,dataset/sample_3/00011237_006_s3.png,3,39847,70040,0.519079,0.350511,0.837288,0.494596,0.904282,...,0.407196,0.519079,0.949601,0.041161,28520,41520,967209,11327,39847,70040
176,dataset/Expert/00011269_019_expert.png,dataset/sample_3/00011269_019_s3.png,1,3351,60618,0.000000,0.000000,0.471002,0,0.885432,...,0.000000,0.000000,0.938994,0.057995,0,60618,984607,3351,3351,60618
177,dataset/Expert/00011355_011_expert.png,dataset/sample_3/00011355_011_s3.png,4,39493,195195,0.304123,0.179331,0.872778,0.257612,0.737018,...,0.182827,0.304123,0.844252,0.158072,35687,159508,849575,3806,39493,195195
178,dataset/Expert/00011450_000_expert.png,dataset/sample_3/00011450_000_s3.png,4,49848,9603,0.315453,0.187263,0.593943,0.304776,0.925389,...,0.976466,0.315453,0.961188,0.000226,9377,226,998502,40471,49848,9603


In [11]:
# new_df.to_csv('../../corpus/calculated_metrics.csv')