In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Info

- Number of raters for crowd: 511



## Dichotomy:
- Benign:
    - melanocytic nevi (NVs): nv
    - benign keratinocytic lesions (BKLs): bkl
    - dermatofisbromas (DFs): df
    - vascular lesions (VASCs)): vasc 
- Malign:
    - melanomas (MELs): mel
    - basal cell carcinomas (BCCs): bcc
    - actinic keratoses and intraepithelial carcinomas (AKIECs): akiec
    

## Expert 1: Only benign
    - nv, bkl, df, vasc
## Expert 2: Only malign
    - mel. bkl, df
## Expert 3: Only vascular lesion
    - vasc
## Expert 4: Only melanoma
    - mel 
## Expert 5: Malign, bkl
    - bkl
## Expert 6: Full Expert
    - All

Experts Confidence: 
From this: https://arxiv.org/pdf/2009.05977.pdf :
- Dermatologists with **dermatoscope and well-trained**: 75% to 84%
- **Naked-eyes** dermatologists: 60%



In [4]:
ham10000_human_df = pd.read_csv("./HAM10000/ISIC2018_Task3_Test_NatureMedicine_AI_Interaction_Benefit.csv", sep=',',decimal=',')

In [5]:
ham10000_test_df = pd.read_csv("./HAM10000/ham10000-private-test-images_metadata_2022-12-09.csv", sep=',',decimal=',')
ham10000_test_df = ham10000_test_df.rename(columns={"isic_id": "image_id"})

In [6]:
print("Human length: {}".format(len(ham10000_human_df)))
print("Test length: {}".format(len(ham10000_test_df)))



human_id = ham10000_human_df["image_id"].unique()
print("Human id: {}".format(len(human_id)))
test_id = ham10000_test_df["image_id"].unique()
print("Test id: {}".format(len(test_id)))
filter_id = list(set(human_id) & set(test_id))

Human length: 3762
Test length: 1705
Human id: 1412
Test id: 1705


In [7]:
human_filter_df = ham10000_human_df[ham10000_human_df['image_id'].isin(filter_id)]
test_filter_df = ham10000_test_df[ham10000_test_df['image_id'].isin(filter_id)]

print("Filter Human length: {}".format(len(human_filter_df)))
print("Filter Test length: {}".format(len(test_filter_df)))

print("Filter Human id: {}".format(len(human_filter_df["image_id"].unique())))
test_id = ham10000_test_df["image_id"].unique()
print("Filter Test id: {}".format(len(test_filter_df)))

Filter Human length: 3762
Filter Test length: 1412
Filter Human id: 1412
Filter Test id: 1412


In [8]:
test_filter_df["diagnosis"].unique()

array(['nevus', 'pigmented benign keratosis', 'melanoma',
       'squamous cell carcinoma', 'actinic keratosis',
       'basal cell carcinoma', 'vascular lesion', 'dermatofibroma'],
      dtype=object)

In [9]:
human_filter_df.columns

Index(['image_id', 'interaction_modality', 'prob_m_mal', 'prob_m_dx_akiec',
       'prob_m_dx_bcc', 'prob_m_dx_bkl', 'prob_m_dx_df', 'prob_m_dx_mel',
       'prob_m_dx_nv', 'prob_m_dx_vasc', 'prob_h_dx_akiec', 'prob_h_dx_bcc',
       'prob_h_dx_bkl', 'prob_h_dx_df', 'prob_h_dx_mel', 'prob_h_dx_nv',
       'prob_h_dx_vasc', 'user_dx_without_interaction_akiec',
       'user_dx_without_interaction_bcc', 'user_dx_without_interaction_bkl',
       'user_dx_without_interaction_df', 'user_dx_without_interaction_mel',
       'user_dx_without_interaction_nv', 'user_dx_without_interaction_vasc',
       'user_dx_with_interaction_akiec', 'user_dx_with_interaction_bcc',
       'user_dx_with_interaction_bkl', 'user_dx_with_interaction_df',
       'user_dx_with_interaction_mel', 'user_dx_with_interaction_nv',
       'user_dx_with_interaction_vasc'],
      dtype='object')

## Replace values in test df for classes


In [13]:
replace_dict = {"nevus":"nv",
                "pigmented benign keratosis": "bkl",
                "melanoma": "mel",
                "squamous cell carcinoma": "akiec", 
                "actinic keratosis": "akiec" , 
                "basal cell carcinoma": "bcc", 
                "vascular lesion": "vasc",
                "dermatofibroma": "df"
               }
# data.replace({
#     'column_name': {
#         'value_to_replace': 'replace_value_with_this'
#     }
# })

mal_dx = {"mel", "bcc", "akiec"}
ben_dx = {"nv", "bkl", "df", "vasc"}

In [15]:
for dx in replace_dict.keys():
    if dx == "squamous cell carcinoma":
        continue
    elif dx == "actinic keratosis":
        bool_dx_a = test_filter_df["diagnosis"]== dx
        bool_dx_b = test_filter_df["diagnosis"]== "squamous cell carcinoma"
        bool_dx = bool_dx_a + bool_dx_b

    bool_dx = test_filter_df["diagnosis"]== dx  # Get idx for diagnosis
    id_dx = test_filter_df[bool_dx]["image_id"]  # Get image id for diagnosis
    human_dx_df = human_filter_df[human_filter_df["image_id"].isin(id_dx)]  # filter human df with diagnosis images
    human_probs_df = human_dx_df.filter(regex='prob_h')  # get human probabilities
    human_probs_np = human_probs_df.to_numpy()  # get numpy matrix
    col_idx = human_probs_df.columns.get_loc("prob_h_dx_"+replace_dict[dx])  # obtain column idx

    # Get max
    conf_dx = (np.argmax(human_probs_np, axis=1) == col_idx).sum() / human_probs_np.shape[0]  # obtain human diagnosis confidence
    if replace_dict[dx] in mal_dx: 
        print("Mal Dx: {}| Conf: {}".format(replace_dict[dx], conf_dx))
    else: 
        print("Ben Dx: {}| Conf: {}".format(replace_dict[dx], conf_dx))

Ben Dx: nv| Conf: 0.8807461692205196
Ben Dx: bkl| Conf: 0.8092105263157895
Mal Dx: mel| Conf: 0.7441860465116279
Mal Dx: akiec| Conf: 0.5789473684210527
Mal Dx: bcc| Conf: 0.8505434782608695
Ben Dx: vasc| Conf: 1.0
Ben Dx: df| Conf: 0.7727272727272727
