## Create the dataframe to record the image file paths and labels
Used for Keras.ImageGenerator

Before run the program, you need to manually seperate the image folders into "train", "val", "test" subfolder

In [1]:
from modelfile.config import Config
from imutils import paths
import os
import pandas as pd
import pickle
from tqdm import tqdm

%load_ext autoreload
%autoreload 2

%reload_ext autoreload

In [2]:
def save_pickle(item, filename):
    item_pickle = open(filename, "wb")
    pickle.dump(item, item_pickle)
    item_pickle.close()
    
def load_pickle(filename):
    
    item_pkl_file = open(filename, 'rb')
    item = pickle.load(item_pkl_file)
    
    return item

In [3]:
config = Config()

In [4]:
trainPaths = list(paths.list_images(config.TRAIN_PATH))
valPaths = list(paths.list_images(config.VAL_PATH))
testPaths = list(paths.list_images(config.TEST_PATH))

In [5]:
def get_df(imagePaths): 
    """
    Read the image files list and put them into a dataframe
    imagePaths: a list of images files location
    return:
        dataframe with image files location and the labels
    """
    path_list = []
    
    for inputPath in tqdm(imagePaths):
        # extract the filename of the input image and extract the
        # class label ("0" for "negative" and "1" for "positive")
        filename = inputPath.split(os.path.sep)[-1]
        label = filename[-5:-4]
        path_list.append([filename, inputPath, label])
        
    df_temp = pd.DataFrame(path_list, columns = ['file', 'dir', 'label'], dtype=str)  
    
    return df_temp

In [6]:
trainDF = get_df(trainPaths)
valDF = get_df(valPaths)
testDF = get_df(testPaths)

100%|██████████| 233056/233056 [00:00<00:00, 469681.95it/s]
100%|██████████| 19500/19500 [00:00<00:00, 585666.61it/s]
100%|██████████| 24968/24968 [00:00<00:00, 563510.65it/s]


In [7]:
save_pickle(trainDF, "./datasets/trainDF.pickle")
save_pickle(valDF, "./datasets/valDF.pickle")
save_pickle(testDF, "./datasets/testDF.pickle")