#### Imports

In [3]:
import os
import wget
import glob
import random
import shutil

from sklearn.model_selection import train_test_split


#### Download TensorFlow Model Garden

In [None]:
# Create a path to store master model
MODELS_MASTER_PATH = os.path.join('Tensorflow', 'models-master')

# Create directory
!mkdir {MODELS_MASTER_PATH}

In [None]:
# Clone TensorFlow Model Garden
!git clone https://github.com/tensorflow/models {models_master_path}

#### Protobuf Installation

In [None]:
# Create directory
PATH_TO_PB = os.path.join('Tensorflow','protoc')
!mkdir {PATH_TO_PB}

# Download protoc
url="https://github.com/protocolbuffers/protobuf/releases/download/v3.15.6/protoc-3.15.6-win64.zip"
wget.download(url)

# Install protoc
!move protoc-3.15.6-win64.zip {PATH_TO_PB}
!cd {PATH_TO_PB} && tar -xf protoc-3.15.6-win64.zip

# Set up an environment variable 
# os.environ['PATH'] += os.pathsep + os.path.abspath(os.path.join(paths['PROTOC_PATH'], 'bin'))

!cd Tensorflow/models-master/research && protoc object_detection/protos/*.proto --python_out=.

#### Object Detection API installation

In [None]:
# Install TF Object Detection API
!cd Tensorflow/models-master/research && 
copy object_detection\\packages\\tf2\\setup.py setup.py && 
python setup.py build && python setup.py install

In [None]:
# Verify installation
!cd Tensorflow/models-master/research && python object_detection/builders/model_builder_tf2_test.py

#### Workspace settings

In [80]:
# Create workplace and training directory
training_dataset_name = 'training_demo'
TRAINING_DEMO_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name)
!mkdir {TRAINING_DEMO_PATH}

# Create recommended directory structure within training folder)
directories = ['annotations', 'exported-models', 'images', 'models', 'pre-trained-models']
for folder in directories:
    os.mkdir(os.path.join(TRAINING_DEMO_PATH,folder))

In [82]:
# Create train and test folders within images directory
TRAIN_SET_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'images', 'train')
TEST_SET_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'images', 'test')

!mkdir {TRAIN_SET_PATH}
!mkdir {TEST_SET_PATH}

**Now create a dataset and label all images.**

#### Randomly selected train and test sets
Credit: https://stackoverflow.com/questions/59627352/split-randomly-dataset-which-is-pair-of-images-and-xml-annotation-files-and-move

In [128]:
# Set up ratio
ratio = 0.1

# Find image names
image_files = glob.glob('Tensorflow/workspace/training_demo/images/*.jpg')

# Remove file extension
image_names = [name.replace(".jpg","") for name in image_files]
image_names = [name.split('\\')[1] for name in image_names]

# Use scikit learn to generate lists of training and test sets
train_names, test_names = train_test_split(image_names, test_size=ratio)

In [130]:
def batch_move_files(file_list, source_path, destination_path):
    for file in file_list:
        image = file +'.jpg'
        xml = file +'.xml'
        shutil.move(os.path.join(source_path, image), os.path.join(destination_path, image))
        shutil.move(os.path.join(source_path, xml), os.path.join(destination_path, xml))


IMAGES_PATH = os.path.join('Tensorflow', 'workspace', training_dataset_name, 'images')

batch_move_files(test_names, IMAGES_PATH, TEST_SET_PATH)
batch_move_files(train_names, IMAGES_PATH, TRAIN_SET_PATH)

godzilla.14c62979-20ac-11ec-97ab-44af28a42ad0.jpg
godzilla.1a5f79be-20aa-11ec-b9d4-44af28a42ad0.jpg
godzilla.17f0f0c2-20aa-11ec-8c03-44af28a42ad0.jpg
godzilla.192831ab-20aa-11ec-8b3a-44af28a42ad0.jpg
godzilla.15fb64af-20ac-11ec-b4d6-44af28a42ad0.jpg
godzilla.17330f2d-20ac-11ec-88d7-44af28a42ad0.jpg
godzilla.199f6443-20ac-11ec-b76c-44af28a42ad0.jpg
godzilla.1ccbb308-20aa-11ec-b876-44af28a42ad0.jpg
godzilla.1869db2b-20ac-11ec-ada0-44af28a42ad0.jpg
godzilla.1b949e15-20aa-11ec-b835-44af28a42ad0.jpg


Please continue with 'Model training' notebook.