In [12]:
import melanoma as mel

%matplotlib inline
%load_ext autoreload
%autoreload -p 2

import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.debug("test")

import os
from string import Template

rootpath = '/hpcstor6/scratch01/s/sanghyuk.kim001'
# img_size = (224, 224) # height, width
# img_size = (150, 150) # height, width
utilInstance = mel.Util(rootpath)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
SLURM_DIR = './SLURMS'
# if os.path.isExist(SLURM_DIR) is not True:
#     os.makedirs(SLURM_DIR)



SLURM_TEMPLATE = Template('''#!/bin/bash
#SBATCH --job-name=${DBname}_${classifier}_${img_height}h_${img_width}w
#SBATCH -p haehn -q haehn_unlim
#SBATCH -w chimera13
#SBATCH -n 2 # Number of cores
#SBATCH -N 1 # Ensure that all cores are on one machine
#SBATCH --gres=gpu:A100:1
#SBATCH --mem=$memory
#SBATCH -t 3-00:00
#SBATCH --mem-per-cpu=8G
#SBATCH --open-mode=append
#SBATCH --output /home/sanghyuk.kim001/MELANOMA/melanoma-detection-CNN/SLURMS/LOGS/%x_%A_%a.out
#SBATCH --error /home/sanghyuk.kim001/MELANOMA/melanoma-detection-CNN/SLURMS/LOGS/%x_%A_%a.err
#SBATCH --array=1
##. /etc/profile,


echo `date`

eval "$$(conda shell.bash hook)"
conda activate clean_chimera_env

# For debugging purposes.
python --version
nvcc -V

# Print this sub-job's task ID
echo "My SLURM_ARRAY_TASK_ID: " $SLURM_ARRAY_TASK_ID

cd /home/sanghyuk.kim001/MELANOMA/melanoma-detection-CNN/

export PYTHONUNBUFFERED=TRUE
python train.py --DB $db --IMG_SIZE ${img_height} ${img_width} --CLASSIFIER $classifier --JOB_INDEX $SLURM_ARRAY_TASK_ID

# end
exit 0;
''')

In [14]:
DBs = [db.name for db in mel.DatasetType]
Classifiers = [c.name for c in mel.NetworkType]

combinedDBs = {
  # 1 DB
  'HAM10000': [mel.DatasetType.HAM10000.name],
  'ISIC2016': [mel.DatasetType.ISIC2016.name],
  'ISIC2017': [mel.DatasetType.ISIC2017.name],
  'ISIC2018': [mel.DatasetType.ISIC2018.name],
  'ISIC2019': [mel.DatasetType.ISIC2019.name],
  'ISIC2020': [mel.DatasetType.ISIC2020.name],
  'PH2': [mel.DatasetType.PH2.name],
  '_7_point_criteria': [mel.DatasetType._7_point_criteria.name],
  'PAD_UFES_20': [mel.DatasetType.PAD_UFES_20.name],
  'MEDNODE': [mel.DatasetType.MEDNODE.name],
  'KaggleMB': [mel.DatasetType.KaggleMB.name],

  # 2 DB
  'ISIC2016+ISIC2017': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name],
  'ISIC2016+ISIC2018': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2018.name],
  'ISIC2016+ISIC2019': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2019.name],
  'ISIC2016+ISIC2020': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2020.name],
  'ISIC2016+PH2': [mel.DatasetType.ISIC2016.name, mel.DatasetType.PH2.name],
  'ISIC2016+_7_point_criteria': [mel.DatasetType.ISIC2016.name, mel.DatasetType._7_point_criteria.name],
  'ISIC2016+PAD_UFES_20': [mel.DatasetType.ISIC2016.name, mel.DatasetType.PAD_UFES_20.name],
  'ISIC2016+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.KaggleMB.name],
  # 3 DBs
  'ISIC2016+ISIC2017+ISIC2018': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name],
  'ISIC2016+ISIC2018+ISIC2019': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name],
  'ISIC2016+ISIC2019+ISIC2020': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name],
  'ISIC2016+ISIC2020+PH2': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name],
  'ISIC2016+PH2+_7_point_criteria': [mel.DatasetType.ISIC2016.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name],
  'ISIC2016+_7_point_criteria+PAD_UFES_20': [mel.DatasetType.ISIC2016.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name],
  'ISIC2016+PAD_UFES_20+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

  # 4 DBs
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name],
  'ISIC2016+ISIC2018+ISIC2019+ISIC2020': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name],
  'ISIC2016+ISIC2020+PH2+_7_point_criteria': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name],
  'ISIC2016+PH2+_7_point_criteria+PAD_UFES_20': [mel.DatasetType.ISIC2016.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name],
  'ISIC2016+_7_point_criteria+PAD_UFES_20+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+PAD_UFES_20+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

  # 5 DBs
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2020+PH2': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name],
  'ISIC2016+ISIC2017+ISIC2018+PH2+_7_point_criteria': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name],
  'ISIC2016+ISIC2017+ISIC2018+_7_point_criteria+PAD_UFES_20': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name],
  'ISIC2016+ISIC2017+ISIC2018+PAD_UFES_20+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+ISIC2017+ISIC2018+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

  # 6 DBs
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+PH2+_7_point_criteria': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+_7_point_criteria+PAD_UFES_20': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+PAD_UFES_20+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

  # 7 DBs
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2+_7_point_criteria': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+_7_point_criteria+PAD_UFES_20': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PAD_UFES_20+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

  # 8 DBs
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2+_7_point_criteria+PAD_UFES_20': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2+PAD_UFES_20+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

  # 9 DBs
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2+_7_point_criteria+PAD_UFES_20+MEDNODE': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name],
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2+_7_point_criteria+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

  # 10 DBs
  'ISIC2016+ISIC2017+ISIC2018+ISIC2019+ISIC2020+PH2+_7_point_criteria+PAD_UFES_20+MEDNODE+KaggleMB': [mel.DatasetType.ISIC2016.name, mel.DatasetType.ISIC2017.name, mel.DatasetType.ISIC2018.name, mel.DatasetType.ISIC2019.name, mel.DatasetType.ISIC2020.name, mel.DatasetType.PH2.name, mel.DatasetType._7_point_criteria.name, mel.DatasetType.PAD_UFES_20.name, mel.DatasetType.MEDNODE.name, mel.DatasetType.KaggleMB.name],

}

# HAM10000 = 1
# ISIC2016= 2
# ISIC2017=3
# ISIC2018 = 4
# ISIC2019 = 5
# ISIC2020 = 6
# PH2 = 7
# _7_point_criteria = 8
# PAD_UFES_20 = 9
# MEDNODE = 10
# KaggleMB = 11


img_size = (150, 150)


# Single DB
# for d in DBs:
#   for c in Classifiers:
#     new_slurm = SLURM_TEMPLATE.substitute(db=[d], memory=32000, img_size=img_size, classifier=c, SLURM_ARRAY_TASK_ID='$SLURM_ARRAY_TASK_ID')
#     slurm_file = os.path.join(SLURM_DIR, d+'_'+c+'.sh')
#     with open(slurm_file, 'w') as f:
#       f.write(new_slurm)

# if len(combinedDBs) == 1:
#   DBname = DB
# elif len(combinedDBs) > 1:
#   DBname = '+'.join(DB)

# Combined DBs
for comb_d in combinedDBs:
  for c in Classifiers:
    DBname = '+'.join(combinedDBs[comb_d])
    new_slurm = SLURM_TEMPLATE.substitute(db=' '.join(combinedDBs[comb_d]), DBname=DBname, img_height=img_size[0], img_width=img_size[1], memory=8000, classifier=c, SLURM_ARRAY_TASK_ID='$SLURM_ARRAY_TASK_ID')
    combinedDB_name = '+'.join(combinedDBs[comb_d])
    slurm_file = os.path.join(SLURM_DIR, combinedDB_name+'_'+c+'.sh')
    with open(slurm_file, 'w') as f:
      f.write(new_slurm)

# slurm_file = os.path.join(SLURM_DIR, DBtemp[0]+'_'+Classifiers_temp[0]+'.sh')
# new_slurm = SLURM_TEMPLATE.substitute(db=d, memory=32000, classifier=c, SLURM_ARRAY_TASK_ID='$SLURM_ARRAY_TASK_ID')
# with open(os.path.join(SLURM_DIR, db+'_'+c+'.sbatch'), 'w') as f:
#   f.write(new_slurm)



In [15]:

# import subprocess
# subprocess.call(['sh', f'./{slurm_file}'])

import itertools
import glob

import os
batches = list(itertools.chain.from_iterable([glob.glob(f'{SLURM_DIR}/*.sh', recursive=True)]))
for b in batches:
    os.system(f'sbatch {b}')


Submitted batch job 204360
Submitted batch job 204361
Submitted batch job 204362
Submitted batch job 204363


Submitted batch job 204364
Submitted batch job 204365
Submitted batch job 204366
Submitted batch job 204367
Submitted batch job 204368
Submitted batch job 204369
Submitted batch job 204370


In [1]:
%run train.py --DB ISIC2016 ISIC2017 ISIC2018 ISIC2019 ISIC2019 ISIC2020 MEDNODE KaggleMB --IMG_SIZE 150 150 --CLASSIFIER EfficientNetB2 --JOB_INDEX 1

2024-02-15 23:42:19.567215: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


DB: ['ISIC2016', 'ISIC2017', 'ISIC2018', 'ISIC2019', 'ISIC2019', 'ISIC2020', 'MEDNODE', 'KaggleMB']
IMG_SIZE: [150, 150]
CLASSIFIER: EfficientNetB2
JOB_INDEX: 1
Combining...




Combining 1 db out of 8 dbs
Combining 2 db out of 8 dbs
Combining 3 db out of 8 dbs
Combining 4 db out of 8 dbs
Combining 5 db out of 8 dbs
Combining 6 db out of 8 dbs
Combining 7 db out of 8 dbs
Combining 8 db out of 8 dbs
Stacking training images
Stacking training labels
Stacking validation images
Stacking validation labels
Combining...
Combining 1 db out of 8 dbs
Combining 2 db out of 8 dbs
Combining 3 db out of 8 dbs
Combining 4 db out of 8 dbs
Combining 5 db out of 8 dbs
Combining 6 db out of 8 dbs
Combining 7 db out of 8 dbs
Combining 8 db out of 8 dbs
Stacking training images
