In [53]:
# The purpose of this script is to train the best possible detector for bounding box classification problem
import tensorflow as tf
import os
import numpy as np
#from PIL import image

AUTOTUNE = tf.data.experimental.AUTOTUNE

pathToTrainFolder = os.path.join("ProcessedDataset", "CoreDetector", "train")
pathToValidationFolder = os.path.join("ProcessedDataset", "CoreDetector", "validation")

pathToTrain_negFolder = os.path.join(pathToTrainFolder, "negatives")
pathToTrain_posFolder = os.path.join(pathToTrainFolder, "positives")
pathToValidation_negFolder = os.path.join(pathToValidationFolder, "negatives")
pathToValidation_posFolder = os.path.join(pathToValidationFolder, "positives")

In [104]:
# Dataset processing utilities and parameters
classNames = tf.convert_to_tensor(["negatives", "positives"])
img_height = 224
img_width = 224 # Reuse imagenet in mind...

def processDataset(file_path):
    # First, find the label from path
    label = 1
    parts = tf.strings.split(file_path, os.path.sep)
    classForFile = parts[-2]
    
    if classForFile == classNames[0]:
        label = 0
    
        
    # Read image and resize 
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [img_height, img_width])
    
    return img, label
    
def loadDatasets():
    train_neg_ds = tf.data.Dataset.list_files(pathToTrain_negFolder+'/*.*', shuffle=False)
    train_pos_ds = tf.data.Dataset.list_files(pathToTrain_posFolder+'/*.*', shuffle=False)
    validation_neg_ds = tf.data.Dataset.list_files(pathToValidation_negFolder+'/*.*', shuffle=False)
    validation_pos_ds = tf.data.Dataset.list_files(pathToValidation_posFolder+'/*.*', shuffle=False)
    
    train_neg_ds = train_neg_ds.map(processDataset, num_parallel_calls=AUTOTUNE)
    train_pos_ds = train_pos_ds.map(processDataset, num_parallel_calls=AUTOTUNE)
    validation_neg_ds = validation_neg_ds.map(processDataset, num_parallel_calls=AUTOTUNE)
    validation_pos_ds = validation_pos_ds.map(processDataset, num_parallel_calls=AUTOTUNE) 
    
    print(f"Train neg cardinality {tf.data.experimental.cardinality(train_neg_ds).numpy()}")
    print(f"Train pos cardinality {tf.data.experimental.cardinality(train_pos_ds).numpy()}")
    print(f"Validation neg cardinality {tf.data.experimental.cardinality(validation_neg_ds).numpy()}")    
    print(f"Validation pos cardinality {tf.data.experimental.cardinality(validation_pos_ds).numpy()}")
    
    return train_neg_ds, train_pos_ds, validation_neg_ds, validation_pos_ds

In [105]:
train_neg_ds, train_pos_ds, validation_neg_ds, validation_pos_ds = loadDatasets()

Train neg cardinality 23
Train pos cardinality 49
Validation neg cardinality 5
Validation pos cardinality 12


In [106]:
for img,label in validation_neg_ds.take(3):
    print(img.shape, label)

(224, 224, 3) tf.Tensor(0, shape=(), dtype=int32)
(224, 224, 3) tf.Tensor(0, shape=(), dtype=int32)
(224, 224, 3) tf.Tensor(0, shape=(), dtype=int32)
