#### 1. Install TensorFlow Object Detection API
- Please make sure you have installed tensorflow or tensorflow-gpu already. 
- Currently API requires TF 1.14 version (2.0 support will come in due course)
- For latest install instructions, check the [github](https://github.com/tensorflow/models/tree/master/research/object_detection) page or [readthedocs](https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/training.html) site for this API.

In [0]:
import os
import cv2
import sys
import numpy as np
import pandas as pd
from PIL import Image
from google.colab import drive
from sklearn import preprocessing
from matplotlib import pyplot as plt

In [0]:
!pip uninstall tensorflow-gpu --quiet
!pip uninstall tensorflow --quiet

In [0]:
!pip list

In [0]:
!pip install tensorflow-gpu==1.14.0 --quiet

Clone git repo tensorflow/models

In [0]:
!git clone https://github.com/tensorflow/models

In [0]:
#Multiple folders including object_detection should exist in models/research folder
!ls -l models/research/

Install other dependencies including protobuf compiler

In [0]:
!apt-get -qq install protobuf-compiler python-pil python-lxml python-tk
!pip install --user Cython  --quiet
!pip install --user contextlib2 --quiet
!pip install --user jupyter --quiet
!pip install --user matplotlib --quiet

Protobuf compilation
- Convert .pb files to python code in object detection folder

In [0]:
#Move to models/research folder
%cd models/research

#Convert protobuf files to Python code using Protobuf compiler installed in previous step
!protoc object_detection/protos/*.proto --python_out=.

In [0]:
!ls -l object_detection/protos

Setup environment variable

In [0]:
#Change path to models/research and models/research/slim folders based on where you have cloned github repository above
%set_env PYTHONPATH=$PYTHONPATH:/content/models/research:/content/models/research/slim

Test Installation

In [0]:
#To be run from models/research folder
!python object_detection/builders/model_builder_test.py

#### 3. Data Pre-processing : Convert XML to CSV

In [0]:
drive.mount('/gdrive')

In [0]:
!ls -l '/gdrive/My Drive/Colab Notebooks/Capstone/Data'

Use xml_to_csv.py file provided. This script will read all XML files and save the information in a CSV file.
 Here we are copying the script file from Google Drive.

In [0]:
%cd /content

In [0]:
!mkdir Data

In [0]:
!cp -r /gdrive/.shortcut-targets-by-id/1ff10ymfgpaqHIVjbSF1dtXpL4a3Fbx1J/Set1 Data
!cp -r /gdrive/.shortcut-targets-by-id/1AoqNyiIZtUNf1dHbt5KV1oa_EXQ54xyR/Set2 Data
!cp -r /gdrive/.shortcut-targets-by-id/12swxziUpu7rZ-5VcFcuAk0ef_la8eM1A/Set3 Data
!cp -r /gdrive/.shortcut-targets-by-id/1YrgyEb8MCeZ_Hm1qXTma2e0Ur5q7OW-0/Set4 Data
!cp -r /gdrive/.shortcut-targets-by-id/1I_LRq2OXjnXL6tgdnZm46Mkp0c5ufAIs/Set6 Data
!cp -r /gdrive/.shortcut-targets-by-id/1InygFn-wZ3S_kMGPtyMzXXT8hDiZzOdR/Set7 Data
!cp -r /gdrive/.shortcut-targets-by-id/1_lJO6yjUlcq2pK-WFepV5WebAUxaHjaK/Set8 Data
!cp -r /gdrive/.shortcut-targets-by-id/1uLjfPb9lEWdl-g7sgIqnbBePwHbSs4w_/Set9 Data

In [0]:
!ls Data/Set*/*.tif* |wc

In [0]:
!mv Data/Set*/*.tif* Data
!mv Data/Set*/*.xml* Data

!ls Data/*.tif* |wc
!ls Data/*.xml |wc

In [0]:
!rm -r Data/Set*

In [0]:
RESIZE = 4

In [0]:
%cd /content/Data

In [0]:
for infile in os.listdir("./"):
    #print("file : " + infile)
    words = infile.split(".")
    if (words[1] == "tif" or words[1] == "tiff") :
       im = Image.open(infile)
       out = im.convert("RGB")
       #print(out.size)
       out = out.resize((out.width // RESIZE, out.height //RESIZE))
       #print(out.size)
       outfile = words[0] + ".png"
       #print("new filename : " + outfile)
       out.save(outfile, "png")

!ls *.png |wc

In [0]:
!rm *.tif*
%cd "/content"

In [0]:
!cp '/gdrive/My Drive/Colab Notebooks/xml_to_csv.py' .

In [0]:
!python xml_to_csv.py -i Data -o pascal_voc.csv

In [0]:
!ls -l *.csv

In [0]:
df = pd.read_csv('pascal_voc.csv')

In [0]:
len(df.filename.unique())

In [0]:
df['filename'] = df.filename.apply(lambda row: row.replace("tiff", "png"))
df['filename'] = df.filename.apply(lambda row: row.replace("tif", "png"))

In [0]:
len(df.filename.unique())

In [0]:
df['width'] = df['width'] // RESIZE
df['height'] = df['height'] // RESIZE
df['xmin'] = df['xmin'] // RESIZE
df['xmax'] = df['xmax'] // RESIZE
df['ymin'] = df['ymin'] // RESIZE
df['ymax'] = df['ymax'] // RESIZE

In [0]:
df['class'].unique()

In [0]:
df.loc[df['class'] == 's']

In [0]:
df.drop(index=df.loc[df['class'] == 's'].index, inplace=True)

In [0]:
len(df.filename.unique())

In [0]:
df.shape

In [0]:
df['class'].unique()

In [0]:
len(df['class'].unique())

In [0]:
#Label Encode class and add a 'label' column to the dataframe
le = preprocessing.LabelEncoder()
df['label'] = le.fit_transform(df['class'])
df.head()

In [0]:
#unique values in the label
df.label.unique()

In [0]:
#Object detection API expects index to start from 1 (and not 0)
df['label'] = df['label'] + 1
df.label.unique()

In [0]:
label_class_dict = dict(zip(df['label'], df['class']))
label_class_dict

In [0]:
all_classes = df['filename'].unique()
all_classes.shape

In [0]:
#Split images between training and test

#80% of the data will be used for training
mask = np.random.rand(all_classes.shape[0]) < 0.8

#Get Training and Test images
train_images = all_classes[mask]
test_images = all_classes[~mask] 

In [0]:
#Check number of images in training and test
train_images.shape, test_images.shape

In [0]:
#Split dataframe between training and test
train_df = df[df['filename'].isin(train_images)]
test_df = df[df['filename'].isin(test_images)]

In [0]:
train_df.shape, test_df.shape

In [0]:
#We will use opencv and matplotlib

#Pickup a random image number
img_num = np.random.randint(0, df.shape[0])

#Read the image
img_file = df.loc[img_num,'filename']
img = cv2.imread('Data/' + img_file)
print(img.shape)

#Find all rows which have same file name
rows_with_file = df[df['filename'] == img_file].index.tolist()

#Draw rectangle(s) as per bounding box information
for i in rows_with_file:

    #Get bounding box
    xmin, ymin, xmax, ymax = df.loc[i, ['xmin', 'ymin', 'xmax', 'ymax']]
    #Get Label
    label = df.loc[i, 'class']
    #Add bounding box
    cv2.rectangle(img, (xmin,ymin), (xmax, ymax), (0,255,0), 2)
    #Add text
    cv2.putText(img,label,(xmin, ymin-5),cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

#Convert BGR format (used by opencv to RGB format used by matplotlib)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

#Draw image using matplotlib
plt.figure(figsize=(15,10))
plt.imshow(img)
plt.show()

In [0]:
train_df.to_csv('train.csv', index=False)
test_df.to_csv('test.csv', index=False)

In [0]:
!ls -l

In [0]:
#Copy generate_tfrecord.py from drive to current directory.
!cp '/gdrive/My Drive/Colab Notebooks/generate_tfrecord.py' . 

In [0]:
#Make sure models/research folder is on system path
sys.path.append('/content/models/research')

In [0]:
#generate tfrecord for training data
!python generate_tfrecord.py --csv_input=train.csv  --img_path=Data --output_path=train.record

#generate tfrecord for test data
!python generate_tfrecord.py --csv_input=test.csv  --img_path=Data --output_path=test.record

In [0]:
!ls -l

In [0]:
#Build a pbtxt label file using label and class name
#This is required by Object detection API
#You can prepare it manually as well. 
#A sample pbtxt file can be found at object_detection/data/pascal_label_map.pbtxt
pbtxt_file_txt = ''
for label in sorted(label_class_dict.keys()):
    
    pbtxt_file_txt += "item {\n  id: " + str(label) + "\n  name: '" +  label_class_dict[label] + "'\n}\n\n"

with open('pascal_voc.pbtxt','w') as pbfile:
    pbfile.write(pbtxt_file_txt)

In [0]:
!ls -l

In [0]:
!cat pascal_voc.pbtxt

In [0]:
#Download the model from zoo
!wget -q http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz

In [0]:
#Extract tar file content
!tar -xvf ssd_mobilenet_v1_coco_2018_01_28.tar.gz

In [0]:
!ls -l ssd_mobilenet_v1_coco_2018_01_28

In [0]:
#Copy ssd_mobilenet_v1_coco_pascal_voc.config from drive to current directory. 
#Change gdrive folder if you have saved script in some other folder
!cp '/gdrive/My Drive/Colab Notebooks/ssd_mobilenet_v1_coco_pascal_voc.config' . 

In [0]:
!cat  ssd_mobilenet_v1_coco_pascal_voc.config

In [0]:
#Copy train.py file from 'models/research/object_detection/legacy' folder to current folder
!cp /content/models/research/object_detection/legacy/train.py .

In [0]:
!mkdir training

In [0]:
#start training
!python train.py --train_dir=training/ --pipeline_config_path=ssd_mobilenet_v1_coco_pascal_voc.config --logtostderr

In [0]:
#Copy export_inference_graph.py file from models/research/object_detection to current directory
!cp /content/models/research/object_detection/export_inference_graph.py .

In [0]:
!ls -l training

In [0]:
#Provide input name, config file location, training folder
!python export_inference_graph.py --input_type image_tensor --pipeline_config_path ssd_mobilenet_v1_coco_pascal_voc.config --trained_checkpoint_prefix training/model.ckpt-376 --output_directory pascal_voc_ssd_model-376

In [0]:
#Provide input name, config file location, training folder
!python export_inference_graph.py --input_type image_tensor --pipeline_config_path ssd_mobilenet_v1_coco_pascal_voc.config --trained_checkpoint_prefix training/model.ckpt-400 --output_directory pascal_voc_ssd_model-400

In [0]:
#Check if model is saved in current direcoty
!ls -l pascal_voc_ssd_model-376

In [0]:
#Check if model is saved in current direcoty
!ls -l pascal_voc_ssd_model-400

In [0]:
!cp -r pascal_voc_ssd_model-376 '/gdrive/My Drive/Colab Notebooks/Capstone/'

In [0]:
!cp -r pascal_voc_ssd_model-400 '/gdrive/My Drive/Colab Notebooks/Capstone/'