In [2]:
import os
import numpy as np
import shutil
import pandas as pd

train_ratio = 0.9
root_dir = 'Data'
classes = ['Acne', 'Eczema', 'Irritant contact dermatitis', 'Lichen Planus', 'Warts']

train_df = pd.DataFrame(columns=['Disease', 'Label'])
test_df = pd.DataFrame(columns=['Disease'])

# create empty folders that will hold all training and testing images
if not os.path.exists(root_dir + '/train/train_images'):
    os.makedirs(root_dir + '/train/train_images')
    os.makedirs(root_dir + '/test/test_images')


for item in classes:
    # collect all filenames from the indivisual folders
    src = root_dir +'/'+ item
    all_filenames = os.listdir(src)
    np.random.shuffle(all_filenames) 
    
    # split the list of filenames into train and test filenames
    train_filenames, test_filenames = np.split(np.array(all_filenames),
                                               [int(len(all_filenames)* (train_ratio))])
    
    # create a dataframe of the filenames and their labels
    df_train = pd.DataFrame(train_filenames, columns=['Disease'])
    df_train['Label'] = item
    
    
    df_test = pd.DataFrame(test_filenames, columns=['Disease'])
    df_test['Label'] = item
    
    train_df = train_df.append(df_train, ignore_index=True)
    test_df =test_df.append(df_test, ignore_index=True)
    
    # get the path to each training and testing image file
    train_filenames = [src+'/'+ name for name in train_filenames.tolist()]
    test_filenames = [src+'/' + name for name in test_filenames.tolist()]

    # copy the files from the individual folders and paste them in the newly created test and train folders
    for name in train_filenames:
        shutil.copy(name, 'Data/train/train_images')

    for name in test_filenames:
        shutil.copy(name, 'Data/test/test_images')

    print(item+' :Training: ', len(train_filenames))
    print(item+' :Testing: ', len(test_filenames))


    

Training:  623
Testing:  70
Training:  423
Testing:  48
Training:  311
Testing:  35
Training:  423
Testing:  47
Training:  388
Testing:  44


In [3]:
test_df

Unnamed: 0,Disease,Label
0,acne-infantile-2.jpg,Acne
1,acne-cystic-95.jpg,Acne
2,acne-cystic-88.jpg,Acne
3,acne-infantile-4.jpg,Acne
4,acne-closed-comedo-8.jpg,Acne
...,...,...
239,warts-digitate-62.jpg,Warts
240,warts-common-66.jpg,Warts
241,warts-digitate-13.jpg,Warts
242,viral-wart-15__ProtectWyJQcm90ZWN0Il0_FocusFil...,Warts


In [4]:
# converting the dataframes to csv
train_df.to_csv('Data/train/train_data.csv', index=False)
test_df.to_csv('Data/test/test_data.csv', index=False)