# Bounding box detection - Racoon data


## Data files
- images: images of racoons
- train_labels.cv: contains coordinates for bounding box for every image

### Load the training data from train.csv file

In [None]:
import tensorflow
import random
random.seed(0)
import warnings
warnings.filterwarnings('ignore')
tensorflow.__version__

In [None]:
project_path = '../input/racoon-detection'
data_path = project_path + '/Racoon Images/images'
train_csv_path = project_path + '/train_labels_.csv'

### Print the shape of the train dataset

### Declare a variable IMAGE_SIZE = 128 as we will be using MobileNet which will be taking Input shape as 128 * 128 

In [None]:
IMAGE_SIZE=128

### With the help of csv.reader write a for loop which can load the train.csv file and store the path, width, height, x0,y0,x1,y1 in induvidual variables. <br>
1. Create a list variable known as 'path' which has all the path for all the training images
2. Create an array 'coords' which has the resized coordinates of the bounding box for the training images

<u>Note:</u> All the training images should be downsampled to 128 * 128 as it is the input shape of MobileNet (which we will be using for Object detection). Hence the corresponding coordinates of the bounding boxes should be changed to match the image dimension of 128 * 128 

In [None]:
import csv
import numpy as np

In [None]:
IMAGE_SIZE=128

normalize = lambda coordinate, value: (coordinate * IMAGE_SIZE)/value 

with open (train_csv_path, 'r') as csvfile:
  y_train = np.zeros((sum(1 for line in csvfile)-1,4))
  X_train=[]
  csvfile.seek(0)
  data = csv.reader(csvfile, delimiter=',')
  next(data)

# int(data[1]) for row in data
  for index, row in enumerate(data):
    for i, r in enumerate(row[x] for x in [1,2,4,5,6,7]):
      row[i+1] = int(r)
# read the required values
    path, image_width, image_height, x0, y0, x1, y1,_ = row
    path = data_path + '/' + path

    y_train[index, 0] = normalize(x0, image_width)
    y_train[index, 1] = normalize(y0, image_height)
    y_train[index, 2] = normalize((x1-x0), image_width)
    y_train[index, 3] = normalize((y1-y0), image_height)

    X_train.append(path)


In [None]:
X_train[0:5]

In [None]:
y_train[0:5]

In [None]:
# resize the image to 128*128
import cv2

#pick up a random image
filename = X_train[1]
unscaled = cv2.imread(filename)
region = y_train[1]

image_height, image_width, _ = unscaled.shape

x0= int(region[0] * image_width / IMAGE_SIZE)
y0= int(region[1] * image_height / IMAGE_SIZE)

x1= int((region[0] + region[2])* image_width / IMAGE_SIZE)
y1= int((region[1] + region[3])* image_height / IMAGE_SIZE)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

fig, axis = plt.subplots(1)
axis.imshow(unscaled)

rect = patches.Rectangle((x0, y0), x1-x0, y1-y0, linewidth=2, edgecolor='r', facecolor='none')

axis.add_patch(rect)

plt.show()

### Write a for loop which can load all the training images into a variable 'batch_images' using the paths from the 'paths' variable
<u>Note:</u> Convert the image to RGB scale as the MobileNet accepts 3 channels as inputs   

In [None]:
from PIL import Image
from tensorflow.keras.applications.mobilenet import preprocess_input

for i, f in enumerate(X_train):
  img = Image.open(f)
  img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
  img = img.convert('RGB')

  X_train[i] = preprocess_input(np.array(img, dtype=np.float32))
  img.close()

In [None]:
X_train = np.array(X_train)
X_train.shape

In [None]:
y_train.shape

### Import MobileNet and load MobileNet into a variable named 'model' which takes input shape of 128 * 128 * 3. Freeze all the layers. Add convolution and reshape layers at the end to ensure the output is 4 coordinates

In [None]:
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Reshape

ALPHA=1

def create_model(trainable=True):
  model = MobileNet(input_shape=(IMAGE_SIZE,IMAGE_SIZE,3), include_top=False, alpha=ALPHA)

  # freez the layers which we have till now from training
  for layer in model.layers:
    layer.trainable = trainable

  x0 = model.layers[-1].output
  x1 = Conv2D(4, kernel_size=4, name='coords')(x0)

  x2 = Reshape((4,))(x1)

  return Model(inputs = model.input, outputs=x2)


### Define a custom loss function IoU which calculates Intersection Over Union

In [None]:
from tensorflow.keras.backend import epsilon
def loss(gt,pred):
    intersections = 0
    unions = 0
    diff_width = np.minimum(gt[:,0] + gt[:,2], pred[:,0] + pred[:,2]) - np.maximum(gt[:,0], pred[:,0])
    diff_height = np.minimum(gt[:,1] + gt[:,3], pred[:,1] + pred[:,3]) - np.maximum(gt[:,1], pred[:,1])
    intersection = diff_width * diff_height
    
    # Compute union
    area_gt = gt[:,2] * gt[:,3]
    area_pred = pred[:,2] * pred[:,3]
    union = area_gt + area_pred - intersection

#     Compute intersection and union over multiple boxes
    for j, _ in enumerate(union):
        if union[j] > 0 and intersection[j] > 0 and union[j] >= intersection[j]:
            intersections += intersection[j]
            unions += union[j]

    # Compute IOU. Use epsilon to prevent division by zero
    iou = np.round(intersections / (unions + epsilon()), 4)
    iou = iou.astype(np.float32)
    return iou

def IoU(y_true, y_pred):
    iou = tensorflow.py_function(loss, [y_true, y_pred], tensorflow.float32)
    return iou

### Write model.compile function & model.fit function with: <br>
1. Optimizer = Adam, Loss = 'mse' and metrics = IoU
2. Epochs = 30, batch_size = 32, verbose = 1

In [None]:
model = create_model(False)
model.summary()

In [None]:
model.compile(loss='mean_squared_error', optimizer='adam', metrics=[IoU])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

earlyS = EarlyStopping(monitor='IoU', patience=5, min_delta=0.01)

model.fit(X_train, y_train, epochs=50, batch_size=32, callbacks=[])

### Pick a test image from the given data

In [None]:
filepath = data_path + '/raccoon-62.jpg'
unscaled = cv2.imread(filepath)
image_height, image_width, _ = unscaled.shape
unscaled.shape


### Resize the image to 128 * 128 and preprocess the image for the MobileNet model

In [None]:
img = cv2.resize(unscaled, (IMAGE_SIZE,IMAGE_SIZE))
feat_scaled = preprocess_input(np.array(img, dtype=np.float32))
print(f'Before preporcessing image size was {unscaled.shape}')
print(f'After preprocess image size is {feat_scaled.shape}')

### Predict the coordinates of the bounding box for the given test image

In [None]:
region = model.predict(np.array([feat_scaled]))[0]

### Plot the test image using .imshow and draw a boundary box around the image with the coordinates obtained from the model

In [None]:
x0 = int(region[0] * image_width / IMAGE_SIZE) # Scale the BBox
y0 = int(region[1] * image_height / IMAGE_SIZE)

x1 = int((region[2]) * image_width / IMAGE_SIZE)
y1 = int((region[3]) * image_height / IMAGE_SIZE)


import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import numpy as np


# Create figure and axes
fig,ax = plt.subplots(1)

# Display the image
ax.imshow(unscaled)

# Create a Rectangle patch
rect = patches.Rectangle((x0, y0), (x1 - x0) , (y1 - y0) , linewidth=2, edgecolor='r', facecolor='none')

# Add the patch to the Axes
ax.add_patch(rect)

plt.show()