In [2]:
import os
import wget
import tensorflow as tf

from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [3]:
# Dataset name
training_dataset_name = 'training_demo'

# Paths
LABEL_MAP_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'annotations', 'label_map.pbtxt')
PREPROCESSING_SCRIPTS_PATH =  os.path.join('Tensorflow','scripts','preprocessing')
ANNOTATIONS_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'annotations')
TRAIN_SET_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'images', 'train')
TEST_SET_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'images', 'test')
PRETRAINED_MODEL_PATH = os.path.join('Tensorflow', 'workspace',training_dataset_name, 'pre-trained-models')
MODELS_PATH = os.path.join('Tensorflow', 'workspace',training_dataset_name, 'models')

# Create new folders
# !mkdir {PREPROCESSING_SCRIPTS_PATH}

#### Create a label map file (*.pbtxt)

In [20]:
training_dataset_name = 'training_demo'

labels = [{'name':'godzilla', 'id':1}]

with open(LABEL_MAP_PATH, 'w') as f:
    for label in labels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')

#### Creating TensorFlow Records

In [25]:
# Set up file information
tf_records_script_url = 'https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/_downloads/da4babe668a8afb093cc7776d7e630f3/generate_tfrecord.py'
tf_records_script_name = 'generate_tfrecord.py'

# Download and move file to specific directory
wget.download(tf_records_script_url)
!move {tf_records_script_name} {PREPROCESSING_SCRIPTS_PATH}

100% [................................................................................] 6410 / 6410        1 file(s) moved.


In [26]:
!python {os.path.join(PREPROCESSING_SCRIPTS_PATH, tf_records_script_name)} -x {TRAIN_SET_PATH} -l {LABEL_MAP_PATH} -o {os.path.join(ANNOTATIONS_PATH, 'train.record')} 
!python {os.path.join(PREPROCESSING_SCRIPTS_PATH, tf_records_script_name)} -x {TEST_SET_PATH} -l {LABEL_MAP_PATH} -o {os.path.join(ANNOTATIONS_PATH, 'test.record')}

Successfully created the TFRecord file: Tensorflow\workspace\training_demo\annotations\train.record
Successfully created the TFRecord file: Tensorflow\workspace\training_demo\annotations\test.record


#### Configuring a training job
Choose model for transfer learning: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md

In [None]:
# Choose model (example: SSD ResNet50 V1 FPN 640x640)
pre_trained_model_url = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz'
pre_trained_model_name = 'ssd_resnet50_v1_fpn_640x640_coco17_tpu-8'

wget.download(pre_trained_model_url)
!move {pre_trained_model_name +'.tar.gz'} {PRETRAINED_MODEL_PATH}
!cd {PRETRAINED_MODEL_PATH} && tar -zxvf {pre_trained_model_name +'.tar.gz'}

#### Configure the training pipeline

In [39]:
# Create a directory of the new model
new_model_name = 'my_ssd_resnet50_v1_fpn'
!cd {MODELS_PATH} && mkdir {new_model_name}

NEW_MODEL_PATH = os.path.join('Tensorflow', 'workspace',training_dataset_name, 'models', new_model_name)

# Copy existing pipeline to the new directory 
!copy {os.path.join(PRETRAINED_MODEL_PATH, pre_trained_model_name, 'pipeline.config')} {NEW_MODEL_PATH}

        1 file(s) copied.


In [22]:
# Set up pipeline path
PIPELINE_CONFIG_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'models', new_model_name, 'pipeline.config')

# Read pipeline.config file
config = 0

In [23]:
# Editing pipeline 
# Credit: https://stackoverflow.com/questions/55323907/dynamically-editing-pipeline-config-for-tensorflow-object-detection
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(PIPELINE_CONFIG_PATH, "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)

In [26]:
# Set up the parameters
pipeline_config.model.ssd.num_classes = len(labels)
pipeline_config.train_config.batch_size = 4
pipeline_config.train_config.fine_tune_checkpoint = os.path.join(PRETRAINED_MODEL_PATH, pre_trained_model_name, 'checkpoint', 'ckpt-0')
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path = LABEL_MAP_PATH
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [os.path.join(ANNOTATIONS_PATH, 'train.record')]
pipeline_config.eval_input_reader[0].label_map_path = LABEL_MAP_PATH
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [os.path.join(ANNOTATIONS_PATH, 'test.record')]

In [27]:
# Overwrite pipeline.config
config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(PIPELINE_CONFIG_PATH, "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)

#### Create a command line to train a model

In [16]:
# Set up number of steps
number_of_steps = 2000

# Define necessary paths
TRAINING_SCRIPT_PATH = os.path.join('Tensorflow', 'models-master', 'research', 'object_detection', 'model_main_tf2.py')

command = "python {} --model_dir={} --pipeline_config_path={} --num_train_steps={}".format(TRAINING_SCRIPT_PATH, 
                                                                                           NEW_MODEL_PATH,
                                                                                           PIPELINE_CONFIG_PATH,
                                                                                           number_of_steps)

In [17]:
print(command)

python Tensorflow\models-master\research\object_detection\model_main_tf2.py --model_dir=Tensorflow\workspace\training_demo\models\my_ssd_resnet50_v1_fpn --pipeline_config_path=Tensorflow\workspace\training_demo\models\my_ssd_resnet50_v1_fpn\pipeline.config --num_train_steps=2000


I recommend to run this command in cmd to be able to see the process. 