# Modeling 1 for Wheat Detection Challenge

### Goals

* Modeling of objects with RetinaNet on COCO dataset

### Comments

* Use Keras model at https://github.com/fizyr/keras-retinanet
* COCO dataset:

## I. Setup

In [None]:
%%capture
pip install tensorflow==2.3

In [None]:
import pandas as pd
import os
import numpy as np
np.random.seed(42)
import tensorflow as tf
tf.__version__
import matplotlib.pyplot as plt

In [None]:
#! git clone https://chritter:Ilovexiwen!cr1@github.com/chritter/GlobalWheatDetection-.git 
!cd GlobalWheatDetection-; ls

In [None]:
! ls GlobalWheatDetection-

In [None]:
import os
import pandas as pd


data_path  = '/kaggle/input/global-wheat-detection'
train_path = '.'

metadata = pd.read_csv(os.path.join(data_path, "train.csv"))
print(metadata.shape)

metadata['bbox'] = metadata['bbox'].apply(eval)

In [None]:
metadata.head()

In [None]:
metadata.bbox.iloc[0]

In [None]:
metadata['imagepath'] = metadata['image_id'].apply(lambda imgid: os.path.join(data_path, f'train/{imgid}.jpg'))

In [None]:
metadata['imagepath'].iloc[0]

In [None]:


train_data = metadata[['imagepath']]
train_data.loc[:, 'x1'] = metadata['bbox'].apply(lambda box: box[0])
train_data.loc[:, 'y1'] = metadata['bbox'].apply(lambda box: box[1])
train_data.loc[:, 'x2'] = metadata['bbox'].apply(lambda box: box[0] + box[2])
train_data.loc[:, 'y2'] = metadata['bbox'].apply(lambda box: box[1] + box[3])
train_data.loc[:, 'class_name'] = 'wheatheads'

train_data[['x1','x2','y1','y2']] = train_data[['x1','x2','y1','y2']].astype(int)

print(train_data.head())


In [None]:
train_data.to_csv(os.path.join(train_path,'train_data.csv'), header=False, index=False)

#### Create 2nd file used for training:

In [None]:
pd.DataFrame([['wheatheads',0]], columns=['classname','classidx']).to_csv(os.path.join(train_path, 'annotation.csv'), header=False, index=False)

### Install the RetinaNet code

In [None]:
%%capture
! cd GlobalWheatDetection-/keras-retinanet; pip install . --user

In [None]:
! export PATH=$PATH:/root/.local/bin

In [None]:
! ls /root/.local/bin

In [None]:
! cat /root/.local/bin/retinanet-train

## II. Preprocessing

* Split data set into training and validation set. Keep test set separated.


In [None]:
train_data = pd.read_csv("train_data.csv")
train_data.shape

In [None]:
train_data.sample(30000, random_state=42).to_csv('train_data_sample30k.csv' ,index=False)

In [None]:
train_data.columns = ['image_path', 'x1', 'y1','x2','y2', 'class_name']
train_data.head()

In [None]:
train_data['image_path'].nunique()

### Create training and validation set

In [None]:
images = train_data['image_path'].unique()
np.random.shuffle(images)
# define the size of trainign set
idx_start=100
idx_end = 200
images_train = images[:idx_start] # 3373 is total number of images
images_valid = images[idx_start:idx_end] # 3373 is total number of images

In [None]:
train_data_tr = train_data[train_data['image_path'].isin(images_train)] # images_train contains all training images
train_data_tr.shape
train_data_tr.to_csv('train_data_split_tr.csv' ,index=False, header=False)

In [None]:
train_data_val = train_data[train_data['image_path'].isin(images_valid)] # images_valid contains all validation images
train_data_val.shape
train_data_val.to_csv('train_data_split_val.csv' ,index=False, header=False)

In [None]:
train_data_tr.shape, train_data_val.shape

In [None]:
! cat annotation.csv

In [None]:
! head train_data_split_tr.csv

## II. Modeling

### IIa Run through command line:

* ~40min/epoch for whole dataset (147k) (train.csv)
* 30k = 26min

#### Modeling Notes
* By default use pretrained weights: --imagenet-weights',  help='Initialize the model with pretrained imagenet weights. This is the default behaviour.


How to choose the number of steps based on number of epochs and number of steps per epoch: 
10 images with batch size 2 result in 5 steps

In [None]:
100./8.

In [None]:
! mkdir tf_dir

In [None]:
! /root/.local/bin/retinanet-train --batch-size 8 --epochs 3  --tensorboard-dir \
    tf_dir --tensorboard-freq batch --steps 12 csv  train_data_split_tr.csv annotation.csv \
        --val-annotations train_data_split_val.csv

In [None]:
#! rm -rf tf_dir/train

In [None]:
! ls tf_dir/train

In [None]:
from tensorflow.python.summary.summary_iterator import summary_iterator

In [None]:
batch_total_loss = []
batch_regression_loss = []
batch_classification_loss = []

epoch_total_loss = []
epoch_regression_loss = []
epoch_classification_loss = []


In [None]:
def plot_losses_event(event_file):

    # event_file = "tf_dir/train/events.out.tfevents.1604697856.b107a84b4086.2942.5011.v2"
    for i, e in enumerate(summary_iterator(event_file)):
        #print(f"###################### {i}")
        #print(e)
        for v in e.summary.value:
            #print(v.tag)
            if v.tag == 'batch_loss':
                batch_total_loss.append(v.simple_value)
                #count+=1
            if 'epoch_loss' == v.tag:
                epoch_total_loss.append(v.simple_value)
            if v.tag == 'batch_regression_loss':
                batch_regression_loss.append(v.simple_value)
            if v.tag == 'batch_classification_loss':
                batch_classification_loss.append(v.simple_value)
            if v.tag == 'epoch_regression_loss':
                epoch_regression_loss.append(v.simple_value)
            if v.tag == 'epoch_classification_loss':
                epoch_classification_loss.append(v.simple_value)
    
    fig, axes = plt.subplots(2, figsize=(16,6))
    axes  = axes.flatten()
    iter_num_batch = range(len(batch_total_loss))
    iter_num_epochs = range(1, len(epoch_total_loss)+1)
    axes[0].plot(iter_num_batch, batch_total_loss, label='batch_loss_total', marker='x')
    axes[0].plot(iter_num_batch, batch_regression_loss, label='batch_regression_loss', marker='x')
    axes[0].plot(iter_num_batch, batch_classification_loss, label='batch_classification_loss', marker='x')
    axes[0].legend(); axes[0].set_xlabel('number iterations');axes[0].set_ylabel('loss')
    axes[1].plot(iter_num_epochs, epoch_total_loss, label='epoch_total_loss', marker='x')
    axes[1].plot(iter_num_epochs, epoch_regression_loss, label='epoch_regression_loss', marker='x')
    axes[1].plot(iter_num_epochs, epoch_classification_loss, label='epoch_classification_loss', marker='x')
    axes[1].legend(); axes[1].set_xlabel('number iterations'); axes[1].set_ylabel('loss')

In [None]:
plot_losses_event("tf_dir/train/events.out.tfevents.1604697856.b107a84b4086.2942.5011.v2")

### Evalute Resnet

In [None]:
from keras_retinanet.models import load_model
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
import cv2
from keras_retinanet.utils.visualization import draw_box, draw_caption

In [None]:
model = load_model('./snapshots/resnet50_csv_02.h5', backbone_name='resnet50')

In [None]:
def preprocess_one_image(image_path):

    # load image
    image = read_image_bgr(image_path)
    

    # copy to draw on
    draw = image.copy()
    
    draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

    # preprocess image for network
    image = preprocess_image(image)
    image, scale = resize_image(image)

    return image, scale, draw

def test_image(image_path):
    
    print(f'test {image_path}')
    
    image, scale, draw = preprocess_one_image(image_path)
    
    my_batch = np.expand_dims(image, axis=0)
    print(f'test shape {my_batch.shape}')
    test = model.predict_on_batch(my_batch)

    print(f'test {test}')
    print('test1 ', test[0].shape) # bounding boxes
    print('test2 ', test[1].shape) # score for bounding boxes
    
    return test, draw
    
def draw_box(image, box, color, thickness=2):
    """ Draws a box on an image with a given color.
    # Arguments
        image     : The image to draw on.
        box       : A list of 4 elements (x1, y1, x2, y2).
        color     : The color of the box.
        thickness : The thickness of the lines to draw a box with.
    """
    b = np.array(box).astype(int)
    #cv2.rectangle(img=image, pt1=(b[0], b[1]), pt2=(b[2], b[3]), color='r', thickness=2, lineType='--')
    #return cv2.rectangle(img=image, rec=b, color=color, thickness=thickness, lineType=3)
    
    return cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), (36,255,12), 10)

In [None]:
img_test = cv2.cvtColor( read_image_bgr(images_train[0]), cv2.COLOR_BGR2RGB)
img_test.shape
draw = draw_box(image=img_test, box=[-100, -100, 100, 100 ], color=(255, 255, 255), thickness=2)

plt.figure(figsize=(15, 15))
plt.imshow(draw)
plt.show()

In [None]:
import time

In [None]:
testa = model.predict_on_batch(np.expand_dims(image, axis=0))
testa[0].shape, testa[1].shape

In [None]:
np.expand_dims(image, axis=0).shape

In [None]:

# process image
start = time.time()
#boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
boxes, scores = model.predict_on_batch(np.expand_dims(image, axis=0))

print("processing time: ", time.time() - start)

# correct for image scale
boxes /= scale

# visualize detections
for box, score, label in zip(boxes[0], scores[0], labels[0]):
    # scores are sorted so we can break
    if score < 0.5:
        break
        
    color = label_color(label)
    
    b = box.astype(int)
    draw_box(draw, b, color=color)
    
    caption = "{} {:.3f}".format(labels_to_names[label], score)
    draw_caption(draw, b, caption)
    
plt.figure(figsize=(15, 15))
plt.axis('off')
plt.imshow(draw)
plt.show()


In [None]:
inputs = 

In [None]:
boxes, scores, labels = model.predict_on_batch(inputs)