In [1]:
import os
import shutil
from tqdm import tqdm

In [2]:
import sys
sys.path.append('../')

from copy_files import copy_files

## Create Dataset: Train & Test (Model)

#### Test Dataset (Kaggle):
- Go to the split directory.
- Copy the test directory to a separate location.
- Zip the copied test directory.
- Rename the zipped test directory to 'ham10000-model-test'.
- Upload the zipped file to Kaggle.

#### Train Dataset (Kaggle):
- After executing the codes below, each train folder must be zipped and then uploaded to Kaggle.

### No-Filtering

In [3]:
# root directory
root_dir = os.path.abspath('../../datasets/HAM10000/')

# creating temp folder
temp_dir = os.path.join(root_dir, 'dataset-processed/dataset-models/temp')

if os.path.exists(temp_dir):
    shutil.rmtree(temp_dir)
os.makedirs(temp_dir)

classes = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

# creating class folders
for i in classes:
    os.makedirs(os.path.join(temp_dir, i))

# copying files 
for i in tqdm(classes, colour='magenta'):
    destination = os.path.join(root_dir, 'dataset-processed/dataset-models/temp', i)
    if i=='nv':
        source_1 = os.path.join(root_dir, 'split/train', i)
        copy_files(source_1, destination)
    else:
        source_2 = os.path.join(root_dir, 'image-data-generator', i)
        source_3 = os.path.join(root_dir, 'stylegan2-ada/oversampling/No-Filtering/oversample', i)
        copy_files(source_2, destination)
        copy_files(source_3, destination)

# creating zip 
output_dir = os.path.join(root_dir, 'dataset-processed/dataset-models')
os.rename(temp_dir, os.path.join(output_dir, 'ham10000-model-train-no-filtering'))
print('Done!')

100%|[35m█████████████████████████████████████████████████████████████████████████████[0m| 7/7 [00:44<00:00,  6.35s/it][0m

Done!





In [4]:
def dataset_process(filtered_dir, folder_name):
    # root directory
    root_dir = os.path.abspath('../../datasets/HAM10000/')

    # creating temp folder
    temp_dir = os.path.join(root_dir, 'dataset-processed/dataset-models/temp')

    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir)
    os.makedirs(temp_dir)

    classes = ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

    # creating class folders
    for i in classes:
        os.makedirs(os.path.join(temp_dir, i))

    # copying files 
    for i in tqdm(classes, colour='magenta'):
        destination = os.path.join(root_dir, 'dataset-processed/dataset-models/temp', i)
        if i=='nv':
            source_1 = os.path.join(root_dir, 'split/train', i)
            copy_files(source_1, destination)
        else:
            source_2 = os.path.join(root_dir, 'image-data-generator', i)
            source_3 = os.path.join(filtered_dir, i)
            copy_files(source_2, destination)
            copy_files(source_3, destination)

    # creating zip 
    output_dir = os.path.join(root_dir, 'dataset-processed/dataset-models')
    os.rename(temp_dir, os.path.join(output_dir, folder_name))
    print('Done!')

In [5]:
# root directory
root_dir = os.path.abspath('../../datasets/HAM10000/')

### FBGT

In [6]:
# α = 0.80 | alpha_1
alpha_1 = os.path.join(root_dir, 'stylegan2-ada/oversampling/FBGT/alpha_1/oversample')
dataset_process(filtered_dir=alpha_1, folder_name='ham10000-model-train-fbgt-alpha-1')

100%|[35m█████████████████████████████████████████████████████████████████████████████[0m| 7/7 [00:48<00:00,  6.90s/it][0m

Done!





In [7]:
# α = 0.85 | alpha_2
alpha_2 = os.path.join(root_dir, 'stylegan2-ada/oversampling/FBGT/alpha_2/oversample')
dataset_process(filtered_dir=alpha_2, folder_name='ham10000-model-train-fbgt-alpha-2')

100%|[35m█████████████████████████████████████████████████████████████████████████████[0m| 7/7 [00:45<00:00,  6.55s/it][0m

Done!





In [8]:
# α = 0.90 | alpha_3
alpha_3 = os.path.join(root_dir, 'stylegan2-ada/oversampling/FBGT/alpha_3/oversample')
dataset_process(filtered_dir=alpha_3, folder_name='ham10000-model-train-fbgt-alpha-3')

100%|[35m█████████████████████████████████████████████████████████████████████████████[0m| 7/7 [00:44<00:00,  6.37s/it][0m

Done!





### FAGT

In [9]:
# α = 0.75 | alpha_1
alpha_1 = os.path.join(root_dir, 'stylegan2-ada/oversampling/FAGT/alpha_1/filtered')
dataset_process(filtered_dir=alpha_1, folder_name='ham10000-model-train-fagt-alpha-1')

100%|[35m█████████████████████████████████████████████████████████████████████████████[0m| 7/7 [00:48<00:00,  6.89s/it][0m

Done!





In [10]:
# α = 0.80 | alpha_2
alpha_2 = os.path.join(root_dir, 'stylegan2-ada/oversampling/FAGT/alpha_2/filtered')
dataset_process(filtered_dir=alpha_2, folder_name='ham10000-model-train-fagt-alpha-2')

100%|[35m█████████████████████████████████████████████████████████████████████████████[0m| 7/7 [00:48<00:00,  6.90s/it][0m

Done!





In [11]:
# α = 0.85 | alpha_3
alpha_3 = os.path.join(root_dir, 'stylegan2-ada/oversampling/FAGT/alpha_3/filtered')
dataset_process(filtered_dir=alpha_3, folder_name='ham10000-model-train-fagt-alpha-3')

100%|[35m█████████████████████████████████████████████████████████████████████████████[0m| 7/7 [00:42<00:00,  6.11s/it][0m

Done!



