In [1]:
import pandas as pd
from os.path import join
import json
from typing import List

In [2]:
!ls ../input/imedhub-internship/task_2/

doctors.csv  example.csv  task_2.docx


In [3]:
df_doctors = pd.read_csv(join('..','input','imedhub-internship','task_2', 'doctors.csv'))
df_example = pd.read_csv(join('..','input','imedhub-internship','task_2', 'example.csv'))

In [4]:
df_doctors.head()

Unnamed: 0,id,original,on_created,doctor1_id,doctor1_mark,on_doctor1_created,on_doctor1_updated,doctor2_id,doctor2_mark,on_doctor2_created,on_doctor2_updated,doctor3_id,doctor3_mark,on_doctor3_created,on_doctor3_updated
0,5155,"{""url"":""/data/mask/1.png"",""name"":""1.png""}",8/26/2019 12:00:33 PM,,,,,8.0,"{""marks"":[{""symptom"":""cardiomegaly"",""x"":38.877...",8/26/2019 6:39:14 PM,8/26/2019 6:39:14 PM,,,,
1,5217,"{""url"":""/data/mask/28.png"",""name"":""2.png""}",8/26/2019 12:00:33 PM,7.0,"{""marks"":[{""symptom"":""cardiomegaly"",""x"":26.352...",8/27/2019 6:15:24 PM,8/27/2019 6:15:24 PM,,,,,,,,
2,5299,"{""url"":""/data/mask/3.png"",""name"":""3.png""}",8/26/2019 12:00:33 PM,,,,,8.0,"{""marks"":[{""symptom"":""pneumonia"",""x"":58.486706...",8/30/2019 9:22:05 PM,8/30/2019 9:22:05 PM,,,,


In [5]:
df_example.head()

Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,1.png,Cardiomegaly,363.932203,470.002268,451.254237,271.186441
1,2.png,Cardiomegaly,251.394709,435.606349,497.371429,430.18836
2,3.png,Effusion,149.617778,728.500156,27.306667,65.991111


In [6]:
df_pred = pd.DataFrame(columns=df_example.columns)

In [7]:
def doctors2pred(df: pd.DataFrame, img_width=1024, img_height=1024, num_doctors=3, print_rows=True) -> pd.DataFrame:
    '''
    Convert original doctors ``pd.DataFrame`` to actual value`s format
    
    Parameters
    ----------
    df: pd.DataFrame
        Original doctors predicted table
    img_width, img_height: int
        Original image dimensions
    num_doctors: int
        Total number of different doctors
    print_rows: bool
        If true, print every row in ``df``
    
    Return
    ------
    pd.DataFrame
        Reformatted ``pd.DataFrame`` table
    
    '''
    
    w_ratio, h_ratio = img_width/100, img_height/100
    preds = []
    for index, row in df.iterrows():
        name = json.loads(row.original)['name']
        for doct_ind in range(1, num_doctors+1):
            if isinstance(row[f'doctor{doct_ind}_mark'], str):
                marks = json.loads(row[f'doctor{doct_ind}_mark'])['marks']
                for mark in marks:
                    dict_pred = {'Image Index': name, 
                                 'Finding Label': mark['symptom'], 
                                 'Bbox [x': mark['x']*w_ratio, 
                                 'y': mark['y']*h_ratio,
                                 'w': mark['width']*w_ratio,
                                 'h]': mark['height']*h_ratio}
                    if print_rows:
                        print(f'img_name: {name:>5}, doct_ind: {doct_ind:>2}, mark: {mark}')
                    preds.append(dict_pred)
                    
    return pd.DataFrame(preds)

In [8]:
doctors_pred = doctors2pred(df_doctors)

img_name: 1.png, doct_ind:  2, mark: {'symptom': 'cardiomegaly', 'x': 38.877754, 'y': 55.01002, 'width': 38.376755, 'height': 16.132265}
img_name: 1.png, doct_ind:  2, mark: {'symptom': 'consolidation', 'x': 57.71543, 'y': 38.176353, 'width': 8.917835, 'height': 12.625251}
img_name: 1.png, doct_ind:  2, mark: {'symptom': 'consolidation', 'x': 33.466934, 'y': 42.785572, 'width': 5.811623, 'height': 14.128257}
img_name: 1.png, doct_ind:  2, mark: {'symptom': 'infiltration', 'x': 26.152304, 'y': 26.853708, 'width': 13.3266535, 'height': 16.132265}
img_name: 1.png, doct_ind:  2, mark: {'symptom': 'fibrosis', 'x': 56.30081, 'y': 25.101625, 'width': 5.894309, 'height': 11.178862}
img_name: 1.png, doct_ind:  2, mark: {'symptom': 'fibrosis', 'x': 31.199186, 'y': 25.304878, 'width': 8.028456, 'height': 14.53252}
img_name: 1.png, doct_ind:  2, mark: {'symptom': 'effusion', 'x': 77.94715, 'y': 65.04065, 'width': 2.8455284, 'height': 8.9430895}
img_name: 2.png, doct_ind:  1, mark: {'symptom': 'car

In [9]:
doctors_pred.head()

Unnamed: 0,Image Index,Finding Label,Bbox [x,y,w,h]
0,1.png,cardiomegaly,398.108201,563.302605,392.977971,165.194394
1,1.png,consolidation,591.006003,390.925855,91.31863,129.28257
2,1.png,consolidation,342.701404,438.124257,59.51102,144.673352
3,1.png,infiltration,267.799593,274.98197,136.464932,165.194394
4,1.png,fibrosis,576.520294,257.04064,60.357724,114.471547


In [10]:
def calculate_iou(x1: float, y1: float, w1: float, h1: float, x2: float, y2: float, w2: float, h2: float) -> float:
    '''
    Calculates Intersection Over Union
    
    Parameters
    ----------
    x1, y1, x2, y2 : float
        Positive real coordinates of left-bottom points of rectangles
    w1, w2: float
        Positive real widths of rectangles
    h1, h2: float
        Positive real heights of rectangles
    
    Return
    ------
    float
        Intersection Over Union [0, 1]
    
    Raises
    ------
    ValueError
        For negative parameter values
        
    '''
    
    if min(x1, y1, w1, h1, x2, y2, w2, h2) < 0:
        raise ValueError("All values should be positive")

    int_x = max(x1, x2)
    int_y = min(y1, y2)
    int_w = min(x1+w1, x2+w2) - int_x
    int_h = int_y - max(y1-h1, y2-h2)
    int_area = int_w*int_h

    if min(int_w, int_h) < 0:
        return 0
    
    actual_area = w1*h1
    pred_area = w2*h2
    
    return int_area / (actual_area + pred_area - int_area)

In [11]:
def calculate_iou_series(actual: pd.Series, pred: pd.Series, x='Bbox [x', y='y', w='w', h='h]') -> float:
    '''
    Returns calculated Intersection Over Union
    
    Parameters
    ----------
    actual: pd.Series
        ``pd.Series`` row of actual value
    pred: pd.Series
        ``pd.Series`` row of doctor`s predicted value
    
    x, y, w, h: str
        Labels of coordinates, widths and heights
        
    Return
    ------
    float
        Intersection Over Union
    
    '''
    return calculate_iou(actual[x], actual[y], actual[w], actual[h], pred[x], pred[y], pred[w], pred[h])

In [12]:
def calculate_mean_iou(actual: pd.DataFrame, pred: pd.DataFrame, ind='Image Index', label='Finding Label', x='Bbox [x', y='y', w='w', h='h]') -> List[int]:
    '''
    Returns calculated Intersection Over Union
    
    Parameters
    ----------
    actual: pd.DataFrame
        Table of actual value
    pred: pd.DataFrame
        Row of doctor`s predicted value
    
    x, y, w, h: str
        Labels of coordinates, widths and heights
        
    Return
    ------
    List[int]
        List of Mean IoU on doctors predicted values for every actual value
        
        -1. : No predicted values for actual
    
    '''

    means = []
    for _, actual_row in actual.iterrows():
        ind_bool = pred[ind]==actual_row[ind]
        label_bool = pred[label].str.lower()==actual_row[label].lower()
        pred_rows = pred[ind_bool & label_bool]

        if not pred_rows.shape[0]:
            mean = -1.
        else:
            mean = sum([calculate_iou_series(actual_row, pred_row, x, y, w, h) for _, pred_row in pred_rows.iterrows()]) / pred_rows.shape[0]
            
        means.append(mean)
    return means

In [13]:
calculate_mean_iou(df_example, doctors_pred)

[0.17764686062514706, 0.7219483672303589, 0.3362351980851913]

- First two rows of actual value have by one row in predicted each;
- Last row of actual have two rows in predicted, one of which didn't overlapped at all;