In [1]:
from PIL import Image
import numpy as np
import os


In [3]:
def loadImagesToArray(path:str):
    '''
    Loads all .jpg and .png files from the specified directory.\n
    Each image will be converted into an array of size (height x width x channels).\n
    The return numpy array is of dimensions (numberOfImages x height x width x channels).\n
    '''
    imagesArray = []

    counter = 0
    for file in os.scandir(path):
        filepath = os.fsdecode(file)
        if(filepath.endswith(".jpg") or filepath.endswith(".png")):
                imgArray = np.array(Image.open(filepath))
                imagesArray.append(imgArray)
                counter += 1                  
    return np.array(imagesArray)

def loadTrainingDataAndLabels(path:str, subdirectories):
    '''
    Loads the training data as numpy arrays and creates the corresponding labels.\n
    For this to work, the images should be under the folder <path> in separate subdirectories, one for each class.\n
    The labels will be inferred from the names of the subdirectories. \n

    Returns the training data as a numpy array with the dimensions (number_of_images x height x width x channels).\n
    Returns the labels as a numpy array with the dimensions (number_of_images).
    '''

    training_data = []
    labels = []

    for directory in subdirectories:
        images_array = loadImagesToArray(os.path.join(path, directory))
        training_data.extend(images_array)

        labels.extend(np.full(len(images_array), directory))

    training_data_array = np.array(training_data)
    print("Shape of training_data: ", training_data_array.shape)
    labels_array = np.array(labels)
    print("Shape of labels: ", labels_array.shape)
    
    return training_data_array, labels_array

In [4]:
training_data, labels = loadTrainingDataAndLabels("./training_patches/", ["background", "ponds", "pools", "solar", "trampoline"])

Shape of training_data:  (3316, 256, 256, 3)
Shape of labels:  (3316,)


In [5]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
le.fit(labels)
labels_categorical = le.transform(labels)



In [6]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(training_data, labels_categorical, test_size=0.33, random_state=1, stratify=labels)

In [7]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(y_train, dtype="int8")
np.unique(y_train, axis=0)

array([[0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [0, 0, 1, 0, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0]], dtype=int8)

In [8]:
from tensorflow.keras.layers import InputLayer, Dense, Flatten, Conv2D, MaxPool2D
from tensorflow import keras
model = keras.models.Sequential()
model.add(InputLayer(input_shape=(256,256,3)))
model.add(Conv2D(filters=10, kernel_size=(3,3), strides=1, padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(20, activation="relu"))
model.add(Dense(20, activation="relu"))
model.add(Dense(5, activation="softmax"))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 10)      280       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 128, 128, 10)      0         
_________________________________________________________________
flatten (Flatten)            (None, 163840)            0         
_________________________________________________________________
dense (Dense)                (None, 20)                3276820   
_________________________________________________________________
dense_1 (Dense)              (None, 20)                420       
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 105       
Total params: 3,277,625
Trainable params: 3,277,625
Non-trainable params: 0
______________________________________________

In [9]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])


history = model.fit(X_train, 
                    y_train, 
                    epochs=20,
                    batch_size=64,
                    validation_split=0.1,
                   )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
from tensorflow import keras
from PIL import Image
from keras.applications.inception_v3 import preprocess_input
from keras import Model
import time
import pandas as pd
from numpy import genfromtxt


In [13]:
def savePredictionToCsv(predictionDataframe: pd.DataFrame, filepath:str):
    '''
    Saves a dataframe containing the prediction for a single image to a CSV file.

    @predictionDataFrame - The dataframe that contains the predictions and should be saved.\n
    @path - The path under which the CSV file should be saved.
    @filename - The name under which the CSV file should be saved.
    '''
    #filepath =  os.path.splitext(filepath)[2]+"_prediction.csv"
    filepath = "./03_validation_results/" +str(filepath.split('/')[2]) +"_prediction.csv"
    predictionDataframe.to_csv(filepath, sep=",", index=False)

def makePredictions(path:str, convnet:keras.Model, stepSize:int, windowSize):
    '''
    Traverses a folder that contains images for which predictions should be made.\n
    Creates a separate prediction CSV file for each image.

    @path - The path containing the images for which predictions should be created.
    '''

    # For each image in path
        # Perform sliding Window approach
            # For each slide
            # Store x_upper_left, y_upper_left, x_lower_right, y_lower_right
            # Run image through convnet
            # Run classifier on output
            # If prediction != 'background'
            # Store prediction in temp array
        # Run non-max suppression to filter predictions
        # Store predictions in csv
    for file in os.scandir(path):
        filepath = os.fsdecode(file)
        
        if(("annotated" in filepath) or not (filepath.endswith(".jpg") or filepath.endswith(".png"))):
           continue
        
        createPredictionsForImage(filepath=filepath, convnet=convnet, stepSize=stepSize, windowSize=windowSize)


def createPredictionsForImage(filepath:str, convnet:keras.Model, stepSize:int, windowSize):
    '''
    Creates the prediction CSV for one image.
    '''

    print("\nCreating predictions for file: ", filepath)
    create_predictions_start_time = time.time()
    #image = Image.open(filepath)
    imgArray = np.array(Image.open(filepath))
    
    patch_coordinates= []  
    preprocessed_patches = []
    counter = 0
    patch_preprocessing_start_time = time.time()
    
    print("Starting sliding window to create patches of size: ", windowSize[0], "x", windowSize[1], ".")
    for(x,y,patch) in sliding_window(imageArray=imgArray, stepSize=stepSize, windowSize=windowSize):
        if counter > 0 and counter%10000 == 0:
            print("Still processing, reached patch", counter)
            print("Execution time for the last 10.000 patches: ", time.time()-patch_preprocessing_start_time, " seconds.")
            patch_preprocessing_start_time = time.time()
            print("Processing continues...")
        
        # Skip if the size of a patch doesn't match the specified windowSize
        if patch.shape[0] != windowSize[0] or patch.shape[1] != windowSize[1]:
            continue
    
        # Save coordinates which are needed for a prediction
        x_upper_left = x
        y_upper_left = y
        x_lower_right = x+windowSize[0]
        y_lower_right = y+windowSize[1]
        x_center = x+128
        y_center = y+128

        # Run the patch through the classification
        preprocessed_patch = preprocess_input(patch)
        preprocessed_patches.append(preprocessed_patch)
        patch_coordinates.append([y_upper_left, x_upper_left, y_lower_right, x_lower_right])
        counter +=1
    
    print("Finished preprocessing of the patches.")
    preprocessed_patches = np.array(preprocessed_patches)
    patch_coordinates = np.array(patch_coordinates)
    print("Shape of preprocessed patches: ", preprocessed_patches.shape)
    print("Shape of patch coordinates: ", patch_coordinates.shape, "\n")

    # Get all predictions
    print("Running patches through ConvNet and using classifier to predict labels...")
    prediction_start_time = time.time()
    predicted_labels_encoded = pd.DataFrame(convnet.predict(preprocessed_patches), columns=["background", "ponds", "pools", "solar", "trampoline"])
    predicted_labels= predicted_labels_encoded.idxmax(1)
    
    # Create a column with the score for the predicted class
    highest_scores = predicted_labels_encoded[["background", "ponds", "pools", "solar", "trampoline"]].max(axis=1)
    print("Shape of highest_scores: ", highest_scores.shape)

    print("Finished predictions, execution time: ", time.time()-prediction_start_time, " seconds.\n")
    
    print("Shape of patch_coordinates: ", patch_coordinates.shape)

    # Combining patch coordinates and predictions
    predictions_array=np.c_[highest_scores, predicted_labels, patch_coordinates]

    print("Shape of combined predictions array (unfiltered): ", predictions_array.shape)

    predictions_dataframe = pd.DataFrame(data=predictions_array, columns=["score", "label", "y_upper_left", "x_upper_left", "y_lower_right", "x_lower_right"])
    # Filter all predictions that contain the label "background"
    predictions_dataframe = predictions_dataframe[predictions_dataframe.label != "background"]
    print("Description of the predictions dataframe: ", predictions_dataframe.describe())

    # Save prediction to csv
    savePredictionToCsv(predictionDataframe=predictions_dataframe, filepath=filepath)
    print("Saved predictions for file: ", filepath, "\n")
    print("Elapsed time: ", time.time()-create_predictions_start_time, " seconds.\n")

    
def sliding_window(imageArray, stepSize:int, windowSize=(256,256)):
    for y in range(0, imageArray.shape[0], stepSize):
	    for x in range(0, imageArray.shape[1], stepSize):
			# yield the current window
		    yield (x, y, imageArray[y:y + windowSize[1], x:x + windowSize[0]])


       

In [14]:
makePredictions("./02_validation_data_images/", convnet=model, stepSize=64, windowSize=(256,256))


Creating predictions for file:  ./02_validation_data_images/DQIMQN.png
Starting sliding window to create patches of size:  256 x 256 .
Still processing, reached patch 10000
Execution time for the last 10.000 patches:  6.599049091339111  seconds.
Processing continues...
Finished preprocessing of the patches.
Shape of preprocessed patches:  (14884, 256, 256, 3)
Shape of patch coordinates:  (14884, 4) 

Running patches through ConvNet and using classifier to predict labels...
Shape of highest_scores:  (14884,)
Finished predictions, execution time:  101.9744520187378  seconds.

Shape of patch_coordinates:  (14884, 4)
Shape of combined predictions array (unfiltered):  (14884, 6)
Description of the predictions dataframe:         score label y_upper_left x_upper_left y_lower_right x_lower_right
count      0     0            0            0             0             0
unique     0     0            0            0             0             0
top      NaN   NaN          NaN          NaN          

In [15]:
import tensorflow as tf
import numpy as np

def nonMaxSuppressBoundingBoxes(path:str, iou_threshold:float, score_threshold:float):
    '''
    Loads prediction csv files from the path and performs the non-max-suppression for each of them.\n
    This method works per-class, i.e. the suppression is performed for each object class independently.\n

    @path - The path in which the to-be-processed csv files are located.\n
    @iou_threshold - The percentage of allowed overlap for predictions of the same class.\n\t\t Must be a value between 0 and 1.\n
    @score_threshold - The minimum score a prediction must have to be considered values.\n\t\t Predictions with a score < score_threshold will be removed from the predictions\n.
    '''

    for file in os.scandir(path):
        filepath = os.fsdecode(file)
        

        # Skip files that are not csv files or that contain "suppressed" in their name
        if(not(filepath.endswith(".csv")) or ("suppressed" in filepath)):
           continue

        print("Creating suppressed csv for file: ", filepath, "...")
        # New empty dataframe for the results
        suppressed_predictions = pd.DataFrame(columns=["label", "y_upper_left", "x_upper_left", "y_lower_right", "x_lower_right"])
       
        # Get the original predictions from a csv file
        original_predictions = pd.read_csv(filepath, header=0)
        
        for pred_class in ["background", "pool", "pond", "solar", "trampoline"]:
            
            # Get labels, scores and coordinates for the class pred_class
            class_original_predictions = original_predictions.loc[original_predictions["label"]==pred_class]
            labels = class_original_predictions["label"]
            scores = class_original_predictions["score"]
            coordinates = class_original_predictions.iloc[:, 2:6].astype(int)
            
            # Run the nonmax suppression and gather the boxes and labels of the remaining predictions
            class_selected_boxes_indices = tf.image.non_max_suppression(boxes=coordinates, scores=scores, max_output_size=200, iou_threshold=iou_threshold, score_threshold=score_threshold )
            class_selected_boxes = tf.gather(coordinates, class_selected_boxes_indices).numpy()
            class_selected_labels = np.array([x.numpy().decode() for x in tf.gather(labels, class_selected_boxes_indices)])
            class_predictions = pd.DataFrame(np.c_[class_selected_labels, class_selected_boxes], columns=["label", "y_upper_left", "x_upper_left", "y_lower_right", "x_lower_right"])
            
            # Add the suppressed predictions of this class to the overall result
            suppressed_predictions = suppressed_predictions.append(class_predictions)
        
        # Save the suppressed predictions to a csv file
        new_filepath =  os.path.splitext(filepath)[0]+"_suppressed.csv"
        suppressed_predictions.to_csv(new_filepath, sep=",", index=False)
        print("Success! Saved suppressed predictions to: ", new_filepath)
        
