In [None]:
!pip install --upgrade pip
!pip install tf_slim
!pip install pycocotools


In [None]:
!pip install --user Cython -q
!pip install --user contextlib2 -q
!pip install --user pillow -q
!pip install --user lxml -q


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install -U protobuf -q

In [None]:
%%bash
git clone --depth 1 https://github.com/tensorflow/models
cd /kaggle/working/models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
import tensorflow as tf
from PIL import Image
import io
from object_detection.utils import dataset_util
from google.protobuf import text_format
from object_detection.protos import pipeline_pb2

# YOLO to TFRecord Format

In [None]:
def create_tf_example(example):
    filename = example['filename']
    print(filename)
    with tf.io.gfile.GFile(filename, 'rb') as fid:
        encoded_jpg = io.BytesIO()
        image = Image.open(filename)
        image= image.resize((640,640))
        image.save(encoded_jpg, format='JPEG')
        encoded_jpg = encoded_jpg.getvalue()
    width, height = image.size
    # TODO(user): Populate the following variables from your example.
    image_format = b'jpg'# or b'png'
    
    xmins = example['xmin'] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = example['xmax'] # List of normalized right x coordinates in bounding box
             # (1 per box)
    ymins = example['ymin'] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = example['ymax']# List of normalized bottom y coordinates in bounding box
             # (1 per box)
    classes_text = ['IssueBank','ReceiverName','AcNo','Amt','ChqNo','DateIss'] # List of string class name of bounding box (1 per box)
    classes_text = [x.encode('utf-8') for x in classes_text]
    classes = example['classid'].astype(np.int32) # List of integer class id of bounding box (1 per box)

    tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename.encode()),
      'image/source_id': dataset_util.bytes_feature(filename.encode()),
      'image/encoded': dataset_util.bytes_feature(encoded_jpg),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

def get_img_info(filename):
    filename = filename.split('/')[-1]
    with open('../input/chequedetection/Images/'+filename.replace('.jpg','.txt')) as f:
        image_annotations = f.readlines()
        f.close()
    for i in range(len(image_annotations)):
        image_annotations[i] = image_annotations[i].split(' ')
    image_annotations = np.array(image_annotations).astype(np.float)
    #image_annotations[:,1:] = np.array(image_annotations)[:,1:].astype(np.float)
    image_annotations[:,0] = np.array(image_annotations)[:,0].astype(np.int)
    return image_annotations



In [None]:
writer = tf.io.TFRecordWriter("./train.record")

with open('../input/chequedetection/train.txt') as f:
    files = f.readlines()
    f.close()
# TODO(user): Write code to read in your dataset to examples variable

for example in files:
    example = example.replace("\n","")
    annotations = get_img_info(example)
    ann_dict = {"filename":example,"xmin":annotations[:,1],"ymin":annotations[:,2],
                "xmax":annotations[:,1]+annotations[:,3],"ymax":annotations[:,2]+annotations[:,4],
                "classid":annotations[:,0]}
    tf_example = create_tf_example(ann_dict)
    writer.write(tf_example.SerializeToString())

writer.close()


In [None]:
writer = tf.io.TFRecordWriter("./test.record")

with open('../input/chequedetection/test.txt') as f:
    files = f.readlines()
    f.close()
# TODO(user): Write code to read in your dataset to examples variable
train = 0
for example in files:
    example = example.replace("\n","")
    annotations = get_img_info(example)
    ann_dict = {"filename":example,"xmin":annotations[:,1],"ymin":annotations[:,2],
                "xmax":annotations[:,1]+annotations[:,3],"ymax":annotations[:,2]+annotations[:,4],
                "classid":annotations[:,0]}
    tf_example = create_tf_example(ann_dict)
    writer.write(tf_example.SerializeToString())

writer.close()


# SSD Mobilenet V1 from TF2 Detection Zoo

In [None]:
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.tar.gz

In [None]:
!tar -xf ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8.tar.gz

In [None]:
!ls ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8/checkpoint

# Customize Pipeline Config

In [None]:
                                                                                                                                                                                                                                       
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()                                                                                                                                                                                                          

with tf.io.gfile.GFile('./ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8/pipeline.config', "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)                                                                                                                                                                                                                 

pipeline_config.model.ssd.num_classes = 6                                                                                                                                                                                          
pipeline_config.train_config.batch_size= 16
pipeline_config.train_config.fine_tune_checkpoint = "/kaggle/working/ssd_mobilenet_v1_fpn_640x640_coco17_tpu-8/checkpoint/ckpt-0"
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path = "../input/chequedetection/object-detection.pbtxt"
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:]  = ["./train.record"]

config_text = text_format.MessageToString(pipeline_config) 

with tf.io.gfile.GFile("./ed_pipeline.config", "wb") as f:                                                                                                                                                                                                                
    f.write(config_text)
    f.close()

In [None]:
!sed -i 's/label_map_path: \"PATH_TO_BE_CONFIGURED\"/label_map_path: \"\.\.\/input\/chequedetection\/object-detection.pbtxt\"/g' ./ed_pipeline.config
!sed -i 's/PATH_TO_BE_CONFIGURED/\.\/test.record/g' ./ed_pipeline.config


# Training

In [None]:
!mkdir cheque_model

In [None]:
!python3 ./models/research/object_detection/model_main_tf2.py \
--model_dir=/kaggle/working/cheque_model/\
--pipeline_config_path=./ed_pipeline.config\
--alsologtostderr \
--num_train_steps=1000 \
--sample_1_of_n_eval_examples=1 \
--num_eval_steps=50\


In [None]:
!ls ./cheque_model

# Export trained model

In [None]:
!mkdir saved_model

In [None]:
!python3 ./models/research/object_detection/exporter_main_v2.py \
  --pipeline_config_path=./ed_pipeline.config \
  --trained_checkpoint_dir=./cheque_model \
  --output_directory=./saved_model/ \
  --input_type=image_tensor

In [None]:
!zip -r model.zip /kaggle/working/saved_model

# Clean up


In [None]:
!rm -r models