### Imports and loading training data

Make some imports first

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import os
from queue import Queue
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import Model
from tensorflow.data import Dataset
from IPython.display import display
import PIL

There is dataset folders

In [None]:
training_dataset_images_path = '/kaggle/input/car-object-detection/data/training_images'
testing_dataset_images_path = '/kaggle/input/car-object-detection/data/testing_images'

In [None]:
boxes_df = pd.read_csv('/kaggle/input/car-object-detection/data/train_solution_bounding_boxes (1).csv')
boxes_df.head()

### Definitions

`single_inputs` - convert tensor from image format to acceptable for Keras model - add new dimension for cases.

In [None]:
def single_inputs(img_arr):
    x,y,z = img_arr.shape
    return img_arr.reshape((1,x,y,z))

`DetectionBox` - it is bounds for X,Y coordinates in training sets and notebook results for specify detected object position.

`TrainingCase` holds data about detected boxes and image paths. Have some utilitary methods to get case inputs, outputs and draw images with boxes.

In [None]:
class DetectionBox:
    def __init__(self,x_min,y_min,x_max,y_max):
        self.x_min = x_min
        self.y_min = y_min
        self.x_max = x_max
        self.y_max = y_max
    
    @staticmethod
    def from_named_tuple(tup):
        return DetectionBox(tup.xmin,tup.ymin,tup.xmax,tup.ymax)
    
    def scale(self,scale_mul):
        return DetectionBox(
            self.x_min * scale_mul,
            self.y_min * scale_mul,
            self.x_max * scale_mul,
            self.y_max * scale_mul
        )
    
    def resize_to_point(self,x,y):
        if self.x_min > x:
            self.x_min = x
        if self.x_max < x:
            self.x_max = x
        if self.y_min > y:
            self.y_min = y
        if self.y_max < y:
            self.y_max = y

class TrainingCase:
    def __init__(self,img_path):
        self.img_path = img_path
        self.boxes = []
    
    def add_box(self,tup):
        self.boxes.append(DetectionBox.from_named_tuple(tup))
    
    def get_image(self):
        img = load_img(self.img_path)
        img_arr = img_to_array(img)
        return img_arr
    
    def draw_image_with_boxes(self):
        img = load_img(self.img_path)
        img_arr = img_to_array(img)
        h,w = img_arr.shape[:2]
        
        def point(y,x,color):
            x = int(x)
            y = int(y)
            if x >= 0 and x < w and y >= 0 and y < h:
                img_arr[y,x,:] = color
        
        for box in self.boxes:
            if box.x_min-1 >= 0:
                for y in range(int(box.y_min),int(box.y_max)):
                    point(y, box.x_min-1,(0,255,0))
                    point(y, box.x_min  ,(0,255,0))
                    point(y, box.x_min+1,(0,255,0))
                    point(y, box.x_max-1,(0,255,0))
                    point(y, box.x_max  ,(0,255,0))
                    point(y, box.x_max+1,(0,255,0))
                for x in range(int(box.x_min),int(box.x_max)):
                    point(box.y_min-1, x,(0,255,0))
                    point(box.y_min  , x,(0,255,0))
                    point(box.y_min+1, x,(0,255,0))
                    point(box.y_max-1, x,(0,255,0))
                    point(box.y_max  , x,(0,255,0))
                    point(box.y_max+1, x,(0,255,0))
        
        img = PIL.Image.fromarray(img_arr.astype(np.uint8),'RGB')
        display(img)
    
    def get_answer(self):
        img = load_img(self.img_path)
        img_w, img_h = img.size
        
        h,w = int(img_h/32),int(img_w/32)
        out_arr = np.concatenate((np.full((1,h,w,1),-1,dtype=np.float),np.ones((1,h,w,1),dtype=np.float)),axis=3)
        
        
        for box in self.boxes:
            x_min,y_min,x_max,y_max = int(box.x_min / 32),int(box.y_min / 32),int(box.x_max / 32),int(box.y_max / 32)
            for y in range(y_min,y_max):
                for x in range(x_min,x_max):
                    if y < 0 or x < 0 or y >= h or x >= w:
                        continue
                    
                    out_arr[0,y,x,0] = 1
                    out_arr[0,y,x,1] = -1
        
        return out_arr
    
    def get_answer_as_outputs(self):
        answer = self.get_answer()
        return answer[0,:,:,0],answer[0,:,:,1]
    
    def get_inputs(self):
        return single_inputs(self.get_image())

`KerasModelWrapper` define utility method for prediction which works with case inputs and get results in necessary format.

In [None]:
class KerasModelWrapper:
    def __init__(self,model):
        self._model = model
    
    @staticmethod
    def _normalize(matrix,min_val,max_val):
        return (matrix - min_val) / (max_val - min_val)
    
    def predict(self,case):
        inputs = case.get_inputs()
        results = self._model.predict(inputs)
        
        outs1 = results[0,:,:,0]
        outs2 = results[0,:,:,1]
        
        min_val = min(outs1.min(),outs2.min())
        max_val = min(outs1.max(),outs2.max())
        
        n_outs1 = KerasModelWrapper._normalize(outs1,min_val,max_val)
        n_outs2 = KerasModelWrapper._normalize(outs2,min_val,max_val)
        
        return outs1,outs2,(n_outs1 > n_outs2)
    
    def __call__(self,case):
        return self.predict(case)

`df_to_training_list` allow to convert dataframe loaded from boxes CSV to list with cases and boxes.

In [None]:
def df_to_training_list(df):
    trn_dict = dict()

    for i in boxes_df.itertuples():
        if not i.image in trn_dict:
            trn_case = TrainingCase(training_dataset_images_path + '/' + i.image)
            trn_dict[i.image] = trn_case
        else:
            trn_case = trn_dict[i.image]
        trn_case.add_box(i)

    trn_list = [val for (key,val) in trn_dict.items()]

    return trn_list

`draw_outputs` it is utility function to show neural network outputs.

In [None]:
def draw_outputs(outputs):
    for out in outputs:
        out = out * 1 # This operation should case boolean matrix to numeric.
        min_val = out.min()
        max_val = out.max()
        out = (out - min_val) / (max_val - min_val)
        img = PIL.Image.fromarray((out*255).astype(np.uint8),'L')
        w,h = img.size
        img = img.resize((w*4,h*4))
        display(img)

### Data preparation

Let's prepare our training cases list first.

In [None]:
training_list = df_to_training_list(boxes_df)

Below examples of images with boxes in training dataset.

In [None]:
for i in range(5):
    idx = random.randrange(len(training_list))
    training_list[idx].draw_image_with_boxes()

We have enough RAM just to put all inputs and outputs to tensors without any data loaders or generators. So, let's do it.

In [None]:
train_X = np.concatenate([x.get_inputs() for x in training_list],axis=0)
train_y = np.concatenate([x.get_answer() for x in training_list],axis=0)

### Model definition and training

I will use pretrained MobileNet architecture with additional convolution layer which split data to 2 classes - car and background. Pretrained layers not locked because experiments show me better quality of fitting.

In [None]:
def build_model():
    mobilenet = MobileNetV2(weights='imagenet',include_top=False)
    x = mobilenet.outputs[0]
    x = Conv2D(2,1)(x)
    model = Model(mobilenet.inputs,x)
    model.compile('adam',loss = 'mse')
    return model

In [None]:
model = build_model()
model.summary()

In [None]:
model.fit(train_X,train_y,epochs=100)

Let's export our model.

In [None]:
model.save('car_detection')

In [None]:
!zip -r car_detection.zip car_detection
!rm -rv car_detection

In [None]:
model_wrapper = KerasModelWrapper(model)

### Case and neural network inputs and outputs

Below 2 output channels which should predict our neural network according to random case from training dataset.

In [None]:
random_case = training_list[random.randrange(len(training_list))]
draw_outputs(random_case.get_answer_as_outputs())

It is actual predictions and restored mask of detected objects

In [None]:
draw_outputs(model_wrapper.predict(random_case))

### Checking on test data

`load_test_images` - load test cases with empty boxes list.

In [None]:
def load_test_images(path):
    cases = []
    for filename in os.listdir(path):
        file_path = path + '/' + filename
        case = TrainingCase(file_path)
        cases.append(case)
    
    return cases

`populate_boxes` have algorythm for walking over detected objects mask for draw predicted boxes. `walk_on_box` do breadth-first search on mask and fill visited points by False values.

In [None]:
def populate_boxes(model,case):
    prediction = model.predict(case)
    mask = prediction[2].copy()
    h,w = mask.shape
    boxes = []
    
    def walk_on_box(sx,sy):
        box = DetectionBox(sx,sy,sx,sy)
        q = Queue()
        q.put((sx,sy))
        while not q.empty():
            x,y = q.get()
            box.resize_to_point(x+1,y+1)
            mask[y,x] = False
            if y+1 < h and mask[y+1,x]:
                q.put((x,y+1))
            if y-1 >= 0 and mask[y-1,x]:
                q.put((x,y-1))
            if x+1 < w and mask[y,x+1]:
                q.put((x+1,y))
            if x-1 >= 0 and mask[y,x-1]:
                q.put((x-1,y))
        
        return box.scale(32)
    
    for y in range(h):
        for x in range(w):
            if mask[y,x]:
                boxes.append(walk_on_box(x,y))
    
    case.boxes = boxes

Fill boxes lists using prediction from neural network

In [None]:
testing_list = load_test_images(testing_dataset_images_path)
for case in testing_list:
    populate_boxes(model_wrapper,case)

Some random examples of testing images and predicted boxes.

In [None]:
for i in range(5):
    idx = random.randrange(len(testing_list))
    while len(testing_list[idx].boxes) == 0:
        idx = random.randrange(len(testing_list))
    testing_list[idx].draw_image_with_boxes()