## Import Libraries

In [1]:
import os
import cv2
import pandas as pd
from tqdm import tqdm

## Create Folders for Train Test Split

In [2]:
dataset_dir = os.path.abspath('../../datasets/ISIC-2016/split')

if os.path.exists(dataset_dir):
    print("Folders already exist!")
else:
    os.makedirs(dataset_dir)
    
    train_dir = os.path.join(dataset_dir, 'train')
    test_dir = os.path.join(dataset_dir, 'test')
    
    os.makedirs(train_dir)
    os.makedirs(test_dir)
    
    targetnames = ['benign', 'malignant']
    for i in targetnames:
        os.makedirs(os.path.join(train_dir, i))
        os.makedirs(os.path.join(test_dir, i))

    print("Folders successfully created!")

Folders successfully created!


## Download Path

In [3]:
downloads = os.path.abspath('../../datasets/ISIC-2016/downloads')

## Train

In [4]:
train_dir = os.path.join(downloads, "ISBI2016_ISIC_Part3_Training_Data")
train_gt = os.path.join(downloads, "ISBI2016_ISIC_Part3_Training_GroundTruth.csv")

In [5]:
train_meta = pd.read_csv(train_gt, sep=",", names=["filename", "class"])

In [6]:
train_filenames = train_meta["filename"].values
train_gt = train_meta["class"].values

In [7]:
train_meta

Unnamed: 0,filename,class
0,ISIC_0000000,benign
1,ISIC_0000001,benign
2,ISIC_0000002,malignant
3,ISIC_0000004,malignant
4,ISIC_0000006,benign
...,...,...
895,ISIC_0011393,benign
896,ISIC_0011397,benign
897,ISIC_0011398,benign
898,ISIC_0011400,benign


In [8]:
for filename, gt in tqdm(zip(train_filenames[:], train_gt[:])):
    
    filename_jpg = filename + '.jpg'
    path = os.path.join(train_dir, filename_jpg)
    
    img = cv2.imread(path)
    img = cv2.resize(img, (286, 286))
    img = img[0:256, 0:256]
    
    if gt == "benign":
        cv2.imwrite(os.path.join(os.path.abspath('../../datasets/ISIC-2016/split/train/benign'), filename_jpg), img)
        img = None
            
    elif gt == "malignant":
        cv2.imwrite(os.path.join(os.path.abspath('../../datasets/ISIC-2016/split/train/malignant'), filename_jpg), img)
        img = None
    
print("Done!")

900it [00:24, 37.42it/s] 

Done!





## Test

In [9]:
test_dir = os.path.join(downloads, "ISBI2016_ISIC_Part3_Test_Data")
test_gt = os.path.join(downloads, "ISBI2016_ISIC_Part3_Test_GroundTruth.csv")

In [10]:
test_meta = pd.read_csv(test_gt, sep=",", names=["filename", "class"])

In [11]:
test_filenames = test_meta["filename"].values
test_gt = test_meta["class"].values

In [12]:
test_meta

Unnamed: 0,filename,class
0,ISIC_0000003,0.0
1,ISIC_0000012,0.0
2,ISIC_0000013,1.0
3,ISIC_0000014,0.0
4,ISIC_0000015,0.0
...,...,...
374,ISIC_0011367,0.0
375,ISIC_0011374,1.0
376,ISIC_0011384,0.0
377,ISIC_0011386,0.0


In [13]:
for filename, gt in tqdm(zip(test_filenames[:], test_gt[:])):
    
    filename_jpg = filename + '.jpg'
    path = os.path.join(test_dir, filename_jpg)
    
    img = cv2.imread(path)
    img = cv2.resize(img, (256, 256))
    
    if gt == 0:
        cv2.imwrite(os.path.join(os.path.abspath('../../datasets/ISIC-2016/split/test/benign'), filename_jpg), img)
        img = None
            
    elif gt == 1:
        cv2.imwrite(os.path.join(os.path.abspath('../../datasets/ISIC-2016/split/test/malignant'), filename_jpg), img)
        img = None
    
print("Done!")

379it [00:09, 40.94it/s] 

Done!



