In [1]:
import os
import pickle 
import pandas as pd
import numpy as np
from PIL import Image
from shutil import copyfile
import pickle
from tqdm.auto import tqdm
from pathlib import Path

In [2]:
data_folder = Path('/mnt/tank/scratch/tpolevaya/datasets/rash/public')

In [3]:
lbl_folder = Path('../_data_lbls')

In [4]:
with open(lbl_folder / 'mapping_diseases.pkl', 'rb') as f: list_ds = pickle.load(f)
with open(lbl_folder / 'mapping_morph.pkl', 'rb') as f: list_ms = pickle.load(f)

In [5]:
list_ds_2 = [[el.lower().strip() for el in L] for L in list_ds]
list_ms_2 = [[el.lower().strip() for el in L] for L in list_ms]

In [6]:
with open(data_folder / 'gsa/images_gsa.pickle', 'rb') as f:
    gsa = pickle.load(f)
with open(data_folder / 'atlas_derm/image_atlas.pickle', 'rb') as f:
    atlas_derm = pickle.load(f)
with open(data_folder / 'hellenic/image_hellenic.pickle', 'rb') as f:
    hellenic = pickle.load(f)
with open(data_folder / 'dermnetnz/dermentnz_images.pickle', 'rb')  as f:
    dermnetnz = pickle.load(f)
with open(data_folder / 'ulb/image_ulb.pickle', 'rb') as f:
    ulb = pickle.load(f)

In [7]:
def get_disease_idx(name):
    name = name.lower().strip()
    for i, L in enumerate(list_ds_2):
        if name in L:
            return i
    return -1
def get_morph_idxs(names):
    names = [n.lower().strip() for n in names.split(',')]
    res = []
    for i, L in enumerate(list_ms_2):
        if len(set(names).intersection(set(L))) > 0:
            res.append(i)
    return res

In [8]:
get_disease_idx(' psorIAsis ')

2

In [9]:
get_morph_idxs('Nodule, granulome,plaquE')

[1, 2, 3]

In [10]:
def can_load_img(pt):
    try:
        img = Image.open(pt)
        return True
    except:
        return False

### GSA

In [11]:
len(gsa)

5932

In [12]:
gsa[0]

['gsa/0000000.jpg',
 ' red,scaly',
 ' psoriasis',
 ' foot,sole',
 ' m',
 ' 64',
 ' clinical',
 0]

In [13]:
els = [(i, str(data_folder / el[0]), get_disease_idx(el[2]), get_morph_idxs(el[1])) for i,el in enumerate(gsa)]

In [14]:
els = list(filter(lambda el: el[2] >= 0, els))

In [15]:
len(els)

1970

In [16]:
els = list(filter(lambda el: can_load_img(el[1]), tqdm(els)))

  0%|          | 0/1970 [00:00<?, ?it/s]



In [17]:
len(els)

1969

In [18]:
with open(lbl_folder / 'gsa.pkl', 'wb') as g:
    pickle.dump(els, g)

In [19]:
with open(lbl_folder / 'gsa.pkl', 'rb') as f:
    els = pickle.load(f)

In [20]:
for i in range(len(list_ds_2)):
    print(i, len(list(filter(lambda x: x[2] == i, els))))

0 71
1 92
2 196
3 87
4 32
5 132
6 280
7 46
8 128
9 74
10 357
11 16
12 60
13 40
14 87
15 24
16 54
17 14
18 8
19 171


### atlas_derm

In [21]:
len(atlas_derm)

10857

In [22]:
atlas_derm[0]

['atlas_derm/0000000.jpg', 'acanthosis nigricans-benign']

In [23]:
els = [(i, str(data_folder / el[0]), get_disease_idx(el[1]), []) for (i,el) in enumerate(atlas_derm)]

In [24]:
els = list(filter(lambda el: el[2] >= 0, els))

In [25]:
len(els)

2740

In [26]:
els = list(filter(lambda el: can_load_img(el[1]), tqdm(els)))

  0%|          | 0/2740 [00:00<?, ?it/s]

In [27]:
len(els)

2740

In [28]:
with open(lbl_folder / 'atlas_derm.pkl', 'wb') as g:
    pickle.dump(els, g)

### hellenic

In [29]:
len(hellenic)

2663

In [30]:
hellenic[0]

['hellenic/0000000.jpg', 'Dermatofibroma']

In [31]:
els = [(i, str(data_folder / el[0]), get_disease_idx(el[1]), []) for (i,el) in enumerate(hellenic)]

In [32]:
els = list(filter(lambda el: el[2] >= 0, els))

In [33]:
len(els)

944

In [34]:
els = list(filter(lambda el: can_load_img(el[1]), tqdm(els)))

  0%|          | 0/944 [00:00<?, ?it/s]

In [35]:
len(els)

944

In [36]:
with open(lbl_folder / 'hellenic.pkl', 'wb') as g:
    pickle.dump(els, g)

### dermnetnz

In [37]:
len(dermnetnz)

8635

In [38]:
dermnetnz[0]

['dermnetnz/0000000.jpg', 'Acne affecting the back images']

In [39]:
els = [(i, str(data_folder / el[0]), get_disease_idx(el[1]), []) for (i,el) in enumerate(dermnetnz)]

In [40]:
els = list(filter(lambda el: el[2] >= 0, els))

In [41]:
len(els)

1710

In [42]:
els = list(filter(lambda el: can_load_img(el[1]), tqdm(els)))

  0%|          | 0/1710 [00:00<?, ?it/s]

In [43]:
len(els)

1710

In [44]:
with open(lbl_folder / 'dermnetnz.pkl', 'wb') as g:
    pickle.dump(els, g)

### ulb

In [45]:
len(ulb)

1207

In [46]:
ulb[0]

['Eczéma allergique de contact au sparadrap',
 'Avant-bras',
 'Vésicule, vésicule par spongiose',
 'NaN',
 'NaN',
 'ulb/0000000.jpg']

In [47]:
els = [(i, str(data_folder / el[-1]), get_disease_idx(el[0]), get_morph_idxs(el[2])) for (i,el) in enumerate(ulb)]

In [48]:
els = list(filter(lambda el: el[2] >= 0, els))

In [49]:
len(els)

319

In [50]:
els = list(filter(lambda el: can_load_img(el[1]), tqdm(els)))

  0%|          | 0/319 [00:00<?, ?it/s]

In [51]:
len(els)

319

In [52]:
with open(lbl_folder / 'ulb.pkl', 'wb') as g:
    pickle.dump(els, g)

# Count

In [53]:
sorted(lbl_folder.iterdir())

[PosixPath('../_data_lbls/atlas_derm.pkl'),
 PosixPath('../_data_lbls/chicago.pkl'),
 PosixPath('../_data_lbls/compatibility.pkl'),
 PosixPath('../_data_lbls/dermis.pkl'),
 PosixPath('../_data_lbls/dermnet.pkl'),
 PosixPath('../_data_lbls/dermnetnz.pkl'),
 PosixPath('../_data_lbls/gsa.pkl'),
 PosixPath('../_data_lbls/hellenic.pkl'),
 PosixPath('../_data_lbls/iowa.pkl'),
 PosixPath('../_data_lbls/mapping_diseases.pkl'),
 PosixPath('../_data_lbls/mapping_morph.pkl'),
 PosixPath('../_data_lbls/ulb.pkl')]

In [54]:
c = 0
for fl in sorted(lbl_folder.iterdir()):
    if not fl.name.startswith('mapping'):
        with open(fl, 'rb') as f:
            L = pickle.load(f)
        c += len(L)

In [55]:
c

17352

In [56]:
len(list_ms)

23

In [57]:
list_ms

[['hyperpigmentation',
  'patch',
  'hypopigmentation',
  'purpura',
  'macule brown',
  'telangiectasia',
  'macule red',
  'macule white',
  'macules',
  'macule black',
  'macula / patch',
  'erythema',
  'squame, lésion érythémato-squameuse',
  'macule, érythème',
  'macule, macule pigmentée',
  'purpura',
  'squame, lésion érythémato-squameusevésicule, vésicule par spongiose',
  'macule, macule dépigmentée',
  'livédo',
  'multiples lésions pigmentées',
  'pustulemacule, érythème',
  'macule'],
 ['granulome'],
 ['nodule',
  'nodule,purple',
  'nodule pink',
  'nodule,skin coloured',
  'nodule,black',
  'nodule,yellow',
  'node',
  'tumor',
  'tumeur, tumeur maligne',
  'tumeur, tumeur bénigne',
  'tumeur',
  'tumeur, carcinome spinocellulaire',
  'tumeur, mélanome',
  'nodule'],
 ['plaque',
  'papules,red',
  'papules,skin coloured',
  'papules',
  'papules,brown',
  'papules,yellow',
  'papules,black',
  'papules white',
  'comedone',
  'papule',
  'plaque',
  'papulo-vesicle',
 