<a href="https://colab.research.google.com/github/joshba06/Object_Detection/blob/main/1_Preparing_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Getting Started

In [1]:
# Choose "1" if running on Colab, choose "0" if running on local machine
system_id = 0

if system_id == 0:
    print('Running on local machine...')
elif system_id == 1:
    print('Running on Google Colab...')
else:
    print('Please define which operating system you are running on...!')

# Define the objects that you would like to train the deep learning-model with below
labels = ['Pen', 'Mug']

labelmap = [{'name':'Pen', 'id':1}, {'name':'Mug', 'id':2}]

# Change model url and name if model changes
pre_trained_model_url = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz'
pre_trained_model_name = 'ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8'
custom_model_name = 'my_ssd_mobilenet_v2_fpnlite'

img_size = (640, 640)

# Make sure the top folder for Colab is renamed to "Colab-Notebooks"


Running on local machine...


In [5]:
import os

# Local machine
if system_id == 0:
    home_path = '/Users/niklas/Virtual_Environment/Version_1/Object_Detection'
    print('Running on local machine...')

# Google colab    
elif system_id == 1:
    
    print('Running on Google Colab...')
    
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Setup path to home directory 
    home_path = '/content/drive/MyDrive/Colab_Notebooks/Object_Detection'
    os.chdir(home_path)    

else:
    print('No operating system was defined...')

Running on local machine...


## 1.2 Create file and folder structure

In [6]:
# Create dictionary with paths to most used directories
paths = {
    '0_User_Input': os.path.join(home_path,'0_User_Input'),
    'backgrounds': os.path.join(home_path,'0_User_Input/backgrounds'),
    'objects': os.path.join(home_path,'0_User_Input/objects'),

    '1_Preprocessing': os.path.join(home_path,'1_Preprocessing'),
    'images': os.path.join(home_path,'1_Preprocessing/images'),

    '2_Tensorflow': os.path.join(home_path,'2_Tensorflow'),
    'protoc': os.path.join(home_path,'2_Tensorflow/protoc'),
    'workspace': os.path.join(home_path,'2_Tensorflow/workspace'),
    'scripts': os.path.join(home_path,'2_Tensorflow/workspace/scripts'),   
    'training': os.path.join(home_path,'2_Tensorflow/workspace/training'),
    'annotations': os.path.join(home_path,'2_Tensorflow/workspace/training/annotations'),
    'images_training': os.path.join(home_path,'2_Tensorflow/workspace/training/images/training'),
    'images_testing': os.path.join(home_path,'2_Tensorflow/workspace/training/images/testing'),
    'models': os.path.join(home_path,'2_Tensorflow/workspace/training/models'),
    'pre_trained_models': os.path.join(home_path,'2_Tensorflow/workspace/training/pre_trained_models'),
}

# Create folder structure from dictionary
for key in paths:
    
    # If path does not exist, create new
    if os.path.exists(paths[key]) is False:
        
        try:
            os.makedirs(paths[key])
        except OSError:
            print('Failed to create %s from scratch.' % paths[key])
        else:
            print ('Successfully created %s from scratch. ' % paths[key])        
        
    # If path does exist, do not replace old path
    else:        
        print('%s already exists..' %paths[key])

# Create subfolders for labels
for label in labels:
  temp_path_prep = os.path.join(paths['images'], label)
  
  if os.path.exists(temp_path_prep) is False:
    try:
        os.makedirs(temp_path_prep)
    except OSError:
        print('Failed to create %s from scratch.' % temp_path_prep)
    else:
        print ('Successfully created %s from scratch. ' % temp_path_prep)

# Create dictionary with paths to most used files
files = {}
       
paths['home'] = home_path

/Users/niklas/Virtual_Environment/Version_1/Object_Detection/0_User_Input already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/0_User_Input/backgrounds already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/0_User_Input/objects already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/1_Preprocessing already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/1_Preprocessing/images already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/2_Tensorflow already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/2_Tensorflow/protoc already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/2_Tensorflow/workspace already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/2_Tensorflow/workspace/scripts already exists..
/Users/niklas/Virtual_Environment/Version_1/Object_Detection/2_Tensorflow/workspace/training already exists..
/Us

## 1.3 Installing Dependencies and importing modules

In [7]:
!pip uninstall opencv-python -y
!pip install opencv-python

[0mCollecting opencv-python
  Using cached opencv_python-4.6.0.66-cp36-abi3-macosx_10_15_x86_64.whl (46.4 MB)
Collecting numpy>=1.19.3
  Using cached numpy-1.23.0-cp39-cp39-macosx_10_9_x86_64.whl (18.1 MB)
Installing collected packages: numpy, opencv-python
Successfully installed numpy-1.23.0 opencv-python-4.6.0.66


In [8]:
!pip install wget

Collecting wget
  Using cached wget-3.2-py3-none-any.whl
Installing collected packages: wget
Successfully installed wget-3.2


In [9]:
import cv2 as cv

# Import uuid (module that allows us to name images uniquely)
import uuid

import time

import pathlib

# Overwrite old folders and move directories
import shutil

import math

import wget

# 2. User action

In [10]:
# Check if images are located in the correct folders
folders = os.listdir(paths['objects'])
image_count = {}
print('Please check whether the correct number of images is displayed:...')

for folder in folders:
    if folder in labels:
        path = os.listdir(paths['objects']+'/'+folder)
        image_count[folder] = len(path)
        
image_count['backgrounds'] = len(os.listdir(paths['backgrounds']))

print(image_count)

Please check whether the correct number of images is displayed:...
{'backgrounds': 0}


# 3. Prepare Tensorflow Object Detection API

## 3.1 Install dependencies

In [11]:
# Local machine
if system_id == 0:
    !pip install tensorflow==2.5.0

# Google colab    
elif system_id == 1:
    !pip install tensorflow-gpu==2.5.0 

else:
    print('No operating system was defined...')

Collecting tensorflow==2.5.0
  Using cached tensorflow-2.5.0-cp39-cp39-macosx_10_11_x86_64.whl (195.7 MB)
Collecting h5py~=3.1.0
  Using cached h5py-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl (2.9 MB)
Collecting wrapt~=1.12.1
  Using cached wrapt-1.12.1-cp39-cp39-macosx_10_9_x86_64.whl
Collecting six~=1.15.0
  Using cached six-1.15.0-py2.py3-none-any.whl (10 kB)
Collecting numpy~=1.19.2
  Using cached numpy-1.19.5-cp39-cp39-macosx_10_9_x86_64.whl (15.6 MB)
Collecting flatbuffers~=1.12.0
  Using cached flatbuffers-1.12-py2.py3-none-any.whl (15 kB)
Collecting google-pasta~=0.2
  Using cached google_pasta-0.2.0-py3-none-any.whl (57 kB)
Collecting keras-nightly~=2.5.0.dev
  Using cached keras_nightly-2.5.0.dev2021032900-py2.py3-none-any.whl (1.2 MB)
Collecting tensorflow-estimator<2.6.0,>=2.5.0rc0
  Using cached tensorflow_estimator-2.5.0-py2.py3-none-any.whl (462 kB)
Collecting absl-py~=0.10
  Using cached absl_py-0.15.0-py3-none-any.whl (132 kB)
Collecting astunparse~=1.6.3
  Using cached as

In [14]:
# Download the model garden (model garden is an environment that is necessary to train new models from scratch or to continue training existing models)
# The model itself will be downloaded later

# Clone repository only if it does not exist already
os.chdir(paths['2_Tensorflow'])
if os.path.exists(paths['2_Tensorflow']+'/models/research') is False:
    print('Cloning model garden..')
    !git clone https://github.com/tensorflow/models.git
    
else:
    print('Model garden already exists')

paths['research'] = paths['2_Tensorflow']+'/models/research'
os.chdir(paths['home'])

Model garden already exists


In [12]:
# Install protobuf

if os.path.exists(paths['2_Tensorflow']+'/protoc/protoc-21.1-osx-aarch_64.zip') is False:

    # Go to destination directory
    os.chdir(paths['protoc'])
    protoc_url = 'https://github.com/protocolbuffers/protobuf/releases/download/v21.1/protoc-21.1-osx-aarch_64.zip'
    wget.download(protoc_url)

    # Extract all content of downloaded file
    from zipfile import ZipFile

    with ZipFile('protoc-21.1-osx-aarch_64.zip', 'r') as zipObj:
        zipObj.extractall()

    os.environ['Path'] = paths['protoc']+'/bin'
    os.chdir(paths['research'])

    !protoc object_detection/protos/*.proto --python_out=.


else:
    print('Protobuf was already installed...')
    
os.chdir(paths['home'])

Protobuf was already installed...


In [15]:
# Install pycocotools

# Clone repository only if it does not exist already
if os.path.exists(paths['research']+'/cocoapi') is False:
    print('Cloning cocoapi..')
    !git clone https://github.com/cocodataset/cocoapi.git
    
    # Moving cloned file to 'research' folder
    destination = paths['research']
    source = paths['home']+'/cocoapi'
    shutil.move(source, destination)
    
else:
    print('Cocoapi already exists')

os.chdir(paths['home'])

Cocoapi already exists


## 3.2 Install Tensorflow Object Detection API

In [18]:
# Check if API has already been installed
if os.path.exists(paths['research']+'/setup.py') is False:
    print('Installing setup.py...')
    
    # Move to 'research' directory
    os.chdir(paths['research'])

    # Copy setup.py to current working directory
    !cp object_detection/packages/tf2/setup.py .

    # Execute setup.py (this command installs all dependencies needed for tf2 odapi)
    !python -m pip install .

    print('Installation complete..')

else:
    print('Object Detection API has already been installed')

# Move back to home-directory
os.chdir(paths['home'])

Installing setup.py...
Processing /Users/niklas/Virtual_Environment/Version_1/Object_Detection/2_Tensorflow/models/research
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting avro-python3
  Using cached avro_python3-1.10.2-py3-none-any.whl
Collecting apache-beam
  Using cached apache_beam-2.40.0-cp39-cp39-macosx_10_9_x86_64.whl (4.7 MB)
Collecting pillow
  Downloading Pillow-9.2.0-cp39-cp39-macosx_10_10_x86_64.whl (3.1 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting lxml
  Downloading lxml-4.9.1-cp39-cp39-macosx_10_15_x86_64.whl (4.6 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.6/4.6 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting matplotlib
  Using cached matplotlib-3.5.2-cp39-cp39-macosx_10_9_x86_64.whl (7.3 MB)
Collecting Cython
  Using cached

Collecting tqdm
  Using cached tqdm-4.64.0-py2.py3-none-any.whl (78 kB)
Collecting tensorflow-estimator<2.10.0,>=2.9.0rc0
  Using cached tensorflow_estimator-2.9.0-py2.py3-none-any.whl (438 kB)
Collecting libclang>=13.0.0
  Using cached libclang-14.0.1-py2.py3-none-macosx_10_9_x86_64.whl (13.2 MB)
Collecting absl-py>=0.2.2
  Using cached absl_py-1.1.0-py3-none-any.whl (123 kB)
Collecting dm-tree~=0.1.1
  Using cached dm_tree-0.1.7-cp39-cp39-macosx_10_9_x86_64.whl (109 kB)
Collecting portalocker
  Using cached portalocker-2.4.0-py2.py3-none-any.whl (16 kB)
Collecting regex
  Using cached regex-2022.6.2-cp39-cp39-macosx_10_9_x86_64.whl (288 kB)
Collecting colorama
  Using cached colorama-0.4.5-py2.py3-none-any.whl (16 kB)
Collecting tabulate>=0.8.9
  Using cached tabulate-0.8.10-py3-none-any.whl (29 kB)
Collecting scikit-learn>=0.21.3
  Using cached scikit_learn-1.1.1-cp39-cp39-macosx_10_13_x86_64.whl (8.6 MB)
Collecting typeguard>=2.7
  Using cached typeguard-2.13.3-py3-none-any.whl (17

Installing collected packages: text-unidecode, sentencepiece, pytz, py-cpuinfo, libclang, keras, gin-config, docopt, dm-tree, crcmod, uritemplate, typeguard, tqdm, toml, threadpoolctl, tensorflow-io-gcs-filesystem, tensorflow-estimator, tabulate, regex, pyyaml, python-slugify, pyparsing, pymongo, proto-plus, promise, portalocker, pillow, orjson, numpy, lxml, kiwisolver, joblib, importlib_resources, googleapis-common-protos, fonttools, fastavro, etils, dill, Cython, cycler, contextlib2, colorama, cloudpickle, avro-python3, absl-py, tf-slim, tensorflow-model-optimization, tensorflow-metadata, tensorflow_io, tensorflow-hub, scipy, sacrebleu, pydot, pyarrow, pandas, opencv-python-headless, kaggle, httplib2, hdfs, tensorflow-addons, scikit-learn, oauth2client, matplotlib, google-auth-httplib2, google-api-core, apache-beam, tensorflow-datasets, seqeval, pycocotools, lvis, google-api-python-client, tensorflow, tensorflow-text, tf-models-official, object-detection
  Attempting uninstall: tenso

## 3.3 Check if API was installed successfully

In [19]:
# Move to 'research' directory
os.chdir(paths['research'])
import object_detection

# Local machine
if system_id == 0:
    !python {paths['research']+'/object_detection/builders/model_builder_tf2_test.py'}
    

# Google colab    
elif system_id == 1:
    !pip install numpy --upgrade # This had to be added for execution on colab. Problem solved using stackoverflow

    # Open file for testing (unforunately it does not work when using paths[research])
    #testfile_path = '/content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/models/research/object_detection/builders/model_builder_tf2_test.py'
    #!python {testfile_path}
    !python {paths['research']+'/object_detection/builders/model_builder_tf2_test.py'}
else:
    print('No operating system was defined...')

# Move back to home directory
os.chdir(paths['home'])

Running tests under Python 3.9.6: /Users/niklas/Virtual_Environment/Version_1/joshBak2/bin/python
[ RUN      ] ModelBuilderTF2Test.test_create_center_net_deepmac
2022-07-04 07:38:21.602647: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  logging.warn(('Building experimental DeepMAC meta-arch.'
W0704 07:38:22.238353 4620293632 model_builder.py:1102] Building experimental DeepMAC meta-arch. Some features may be omitted.
INFO:tensorflow:time(__main__.ModelBuilderTF2Test.test_create_center_net_deepmac): 0.98s
I0704 07:38:22.559390 4620293632 test_util.py:2458] time(__main__.ModelBuilderTF2Test.test_create_center_net_deepmac): 0.98s
[       OK ] ModelBuilderTF2Test.test_create_center_net_deepmac
[ RUN      ] ModelBuilderTF2Test

I0704 07:38:29.735615 4620293632 efficientnet_model.py:143] round_filter input=40 output=40
I0704 07:38:29.735800 4620293632 efficientnet_model.py:143] round_filter input=80 output=80
I0704 07:38:30.190960 4620293632 efficientnet_model.py:143] round_filter input=80 output=80
I0704 07:38:30.191088 4620293632 efficientnet_model.py:143] round_filter input=112 output=112
I0704 07:38:30.470999 4620293632 efficientnet_model.py:143] round_filter input=112 output=112
I0704 07:38:30.471130 4620293632 efficientnet_model.py:143] round_filter input=192 output=192
I0704 07:38:30.990633 4620293632 efficientnet_model.py:143] round_filter input=192 output=192
I0704 07:38:30.990766 4620293632 efficientnet_model.py:143] round_filter input=320 output=320
I0704 07:38:31.093255 4620293632 efficientnet_model.py:143] round_filter input=1280 output=1280
I0704 07:38:31.162914 4620293632 efficientnet_model.py:453] Building model efficientnet with params ModelConfig(width_coefficient=1.0, depth_coefficient=1.0, 

I0704 07:38:43.104254 4620293632 efficientnet_model.py:453] Building model efficientnet with params ModelConfig(width_coefficient=1.1, depth_coefficient=1.2, resolution=260, dropout_rate=0.3, blocks=(BlockConfig(input_filters=32, output_filters=16, kernel_size=3, num_repeat=1, expand_ratio=1, strides=(1, 1), se_ratio=0.25, id_skip=True, fused_conv=False, conv_type='depthwise'), BlockConfig(input_filters=16, output_filters=24, kernel_size=3, num_repeat=2, expand_ratio=6, strides=(2, 2), se_ratio=0.25, id_skip=True, fused_conv=False, conv_type='depthwise'), BlockConfig(input_filters=24, output_filters=40, kernel_size=5, num_repeat=2, expand_ratio=6, strides=(2, 2), se_ratio=0.25, id_skip=True, fused_conv=False, conv_type='depthwise'), BlockConfig(input_filters=40, output_filters=80, kernel_size=3, num_repeat=3, expand_ratio=6, strides=(2, 2), se_ratio=0.25, id_skip=True, fused_conv=False, conv_type='depthwise'), BlockConfig(input_filters=80, output_filters=112, kernel_size=5, num_repeat=

I0704 07:38:50.633527 4620293632 ssd_efficientnet_bifpn_feature_extractor.py:145] EfficientDet EfficientNet backbone version: efficientnet-b5
I0704 07:38:50.634283 4620293632 ssd_efficientnet_bifpn_feature_extractor.py:147] EfficientDet BiFPN num filters: 288
I0704 07:38:50.635155 4620293632 ssd_efficientnet_bifpn_feature_extractor.py:148] EfficientDet BiFPN num iterations: 7
I0704 07:38:50.638129 4620293632 efficientnet_model.py:143] round_filter input=32 output=48
I0704 07:38:50.657358 4620293632 efficientnet_model.py:143] round_filter input=32 output=48
I0704 07:38:50.657554 4620293632 efficientnet_model.py:143] round_filter input=16 output=24
I0704 07:38:50.925000 4620293632 efficientnet_model.py:143] round_filter input=16 output=24
I0704 07:38:50.925135 4620293632 efficientnet_model.py:143] round_filter input=24 output=40
I0704 07:38:51.597238 4620293632 efficientnet_model.py:143] round_filter input=24 output=40
I0704 07:38:51.597455 4620293632 efficientnet_model.py:143] round_fil

I0704 07:39:03.954707 4620293632 efficientnet_model.py:143] round_filter input=24 output=48
I0704 07:39:03.954905 4620293632 efficientnet_model.py:143] round_filter input=40 output=80
I0704 07:39:04.718446 4620293632 efficientnet_model.py:143] round_filter input=40 output=80
I0704 07:39:04.718583 4620293632 efficientnet_model.py:143] round_filter input=80 output=160
I0704 07:39:06.260586 4620293632 efficientnet_model.py:143] round_filter input=80 output=160
I0704 07:39:06.260718 4620293632 efficientnet_model.py:143] round_filter input=112 output=224
I0704 07:39:07.476800 4620293632 efficientnet_model.py:143] round_filter input=112 output=224
I0704 07:39:07.477296 4620293632 efficientnet_model.py:143] round_filter input=192 output=384
I0704 07:39:10.540776 4620293632 efficientnet_model.py:143] round_filter input=192 output=384
I0704 07:39:10.540970 4620293632 efficientnet_model.py:143] round_filter input=320 output=640
I0704 07:39:11.566021 4620293632 efficientnet_model.py:143] round_fi

# 4. Prepare new training job

## 4.1 Install dependencies and import modules

In [7]:
## Install missing modules for randomTrafficSign
!pip install matplotlib
!pip install lxml

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
os.chdir(paths['0_User_Input']+'/scripts')
import randomTrafficSign as ts

from xml.etree.ElementTree import ElementTree
from xml.etree.ElementTree import Element
import xml.etree.ElementTree as etree
import xml.dom.minidom

from lxml import etree
os.chdir(paths['home'])

In [9]:
import numpy as np

## 4.2 Partition images for testing and training

**Important**: Images of objects must be in the following format: "Mug_1.jpg", "Cat_3.jpg" and must be located in their respective folders

In [10]:
factor_n = 50

for label in labels:
    folders = os.listdir(paths['objects'])
    
    ## Multiply each image per label by factor_n, create an xml file and move to preprocessing folder
    # Only consider folders that correspond to current labels, disregard old labels
    if label in folders:
        sub_folder = os.listdir(paths['objects']+'/'+str(label))
        
        # Loop through subfolder and delete .ds_store files
        try:
            sub_folder.remove('.DS_Store')
        except:
            print('No file called .DS_Store found in array')
          
        # Multiply image and create xml file
        all_items = []
        path_object = paths['objects']+'/'+label
        path_save = paths['images']+'/'+label+'/'
        ts.main(img_size, all_items, paths['backgrounds'],path_object, factor_n, 50, 20, save_folder=path_save)
        # first argument: Path to backgrounds folder
        # second argument: Path to object images folder
        # third argument: Number of images to be created for each object image
        # fourth argument: upperScale
        # fifth argument: lowerScale
        # sixth argument: folder where all images are to be saved
        # last: dictionary including all jpg and xml filenames for the current label

        # Count number of images in each label folder
        n_items = len(all_items)

        # Use 15% of the images for testing, 85% for training
        n_testing = 2*(math.ceil(0.5*0.15*n_items))
    
        n_training = n_items - n_testing
        print('Label: '+str(label)+', total: '+str(n_items)+', testing: '+str(n_testing)+', training: '+str(n_training))      
              
        # Parition images for training and testing in a random order
        
        # Training
        count = 0
        number_history = []
        for i in range(n_training):
          randnum = np.random.randint(len(all_items))
          number_history.append(randnum)
          source_jpg = all_items[randnum][0]
          source_xml = all_items[randnum][1]
          #print('Copying image" '+str(source_jpg)+'" to training folder')
          
          # Ignore hidden files, such as .ds_store
          if not (all_items[randnum][0].startswith('.') or all_items[randnum][1].startswith('.')) :
              shutil.copy(source_jpg, paths['images_training'])
              shutil.copy(source_xml, paths['images_training'])
              all_items.pop(randnum)
              count +=1
        print('Random numbers: ')
        print(number_history)
        print('Copied '+str(count)+' images to training folder')       

        # Testing
        count = 0
        number_history = []
        for i in range(n_testing):
          randnum = np.random.randint(len(all_items))
          number_history.append(randnum)
          source_jpg = all_items[randnum][0]
          source_xml = all_items[randnum][1]
          #print('Copying image" '+str(source_jpg)+'" to testing folder')
          
          # Ignore hidden files, such as .ds_store
          if not (all_items[randnum][0].startswith('.') or all_items[randnum][1].startswith('.')) :
              shutil.copy(source_jpg, paths['images_testing'])
              shutil.copy(source_xml, paths['images_testing'])
              all_items.pop(randnum)
              count +=1
        print('Random numbers: ')
        print(number_history)
        print('Copied '+str(count)+' images to testing folder')     
            

No file called .DS_Store found in array
Working on image: 1(Pen)
Working on image: 2(Pen)
Working on image: 3(Pen)
Working on image: 4(Pen)
Created 200 images for label Pen
Label: Pen, total: 200, testing: 30, training: 170
Random numbers: 
[128, 188, 55, 81, 32, 106, 34, 98, 135, 61, 42, 1, 49, 6, 32, 152, 41, 162, 157, 140, 144, 146, 63, 105, 60, 163, 155, 73, 60, 127, 86, 16, 44, 34, 159, 71, 19, 162, 10, 33, 0, 45, 145, 37, 101, 114, 116, 109, 86, 16, 123, 52, 71, 20, 76, 96, 8, 117, 4, 108, 6, 27, 124, 111, 22, 119, 67, 11, 52, 36, 0, 43, 17, 13, 123, 87, 8, 77, 5, 54, 54, 85, 57, 67, 108, 36, 70, 45, 10, 9, 87, 14, 89, 9, 20, 104, 95, 95, 8, 58, 80, 51, 70, 49, 35, 61, 0, 27, 87, 13, 4, 26, 2, 26, 63, 38, 58, 36, 27, 24, 60, 34, 62, 42, 5, 64, 1, 43, 8, 49, 8, 4, 24, 42, 51, 61, 28, 41, 55, 3, 49, 23, 27, 51, 41, 45, 33, 47, 50, 12, 28, 5, 10, 30, 44, 43, 13, 14, 17, 31, 15, 31, 36, 2, 8, 29, 2, 17, 23, 3]
Copied 170 images to training folder
Random numbers: 
[0, 4, 3, 17, 16, 22

## 5.4 Create labelmap

In [11]:
files['labelmap'] = paths['annotations']+'/label_map.pbtxt'

with open(files['labelmap'], 'w') as file:
    for label in labelmap:
        file.write('item { \n')
        file.write('\tname:\'{}\'\n'.format(label['name']))
        file.write('\tid:{}\n'.format(label['id']))
        file.write('}\n')

In [12]:
# Install pandas
!pip install pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## 5.5 Create TFRecord

In [13]:
# Add labelmap and tfrecords to 'files' dictionary
files['tf_train'] = paths['annotations']+'/train.record'
files['tf_test'] = paths['annotations']+'/test.record'

# Add line to download TF record file from nicknochnack


In [14]:
# Copy generatetfrecord.py to scripts
source = paths['0_User_Input']+'/scripts/generatetfrecord.py'
shutil.copy(source, paths['scripts'])

# Change directory to 'scripts'
os.chdir(paths['workspace']+'/scripts')

# Create / overwrite TFRecord files for training and testing

# Create train data:
!python generatetfrecord.py -x {paths['images_training']} -l {files['labelmap']} -o {files['tf_train']}

# Create test data:
!python generatetfrecord.py -x {paths['images_testing']} -l {files['labelmap']} -o {files['tf_test']}

# Go back to home directory
os.chdir(paths['home'])

Successfully created the TFRecord file: /content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/workspace/training/annotations/train.record
Successfully created the TFRecord file: /content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/workspace/training/annotations/test.record


## 5.6 Download pre-trained model

In [15]:
# Update the settings for the image import and multiplication script depending on which size of image the model uses!

# Check if the chosen model has already been downloaded
if os.path.exists(paths['pre_trained_models']+'/'+str(pre_trained_model_name)) is False:

    # Go to destination directory
    os.chdir(paths['pre_trained_models'])
    wget.download(pre_trained_model_url)

    # Extract all content of downloaded file
    import tarfile

    file = tarfile.open('ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz')

    file.extractall(paths['pre_trained_models'])

    file.close()
                  
    # Delete downloaded tar.gz file to save storage space
    # Add code here
    #
    #
    
    # Create new folder for this model in training/models
    paths['active_model'] = paths['models']+'/'+custom_model_name
    os.makedirs(paths['active_model'])
    
    print('Model was successfully downloaded...')


else:
    print(str(pre_trained_model_name)+' was already installed...')
    
os.chdir(paths['home'])

Model was successfully downloaded...


## 5.7 Update the config file and pipeline for the new training job

In [20]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [21]:
## Copy or replace pipeline in active model directory
files['pipeline_downloaded'] = paths['pre_trained_models']+'/'+pre_trained_model_name+'/pipeline.config'
paths['active_model'] = paths['models']+'/'+custom_model_name
files['pipeline_active'] = paths['active_model']+'/pipeline.config'
paths['downloaded_model'] = paths['pre_trained_models']+'/'+pre_trained_model_name

# If pipeline already exists in active directory, replace it
if os.path.exists(files['pipeline_active']) == True:
    os.remove(files['pipeline_active'])
    shutil.copy(files['pipeline_downloaded'], paths['active_model'])
    print('Pipeline replaced in active model directory...')

# If pipeline does not yet exist in active directory, copy it from downloaded model
else:
    files['pipeline_downloaded'] = paths['pre_trained_models']+'/'+pre_trained_model_name+'/pipeline.config' 
    shutil.copy(files['pipeline_downloaded'], paths['active_model'])
    print('Pipeline copied to active model directory...')

## Configure pipeline

config = config_util.get_configs_from_pipeline_file(files['pipeline_active'])
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(files['pipeline_active'], "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)  


pipeline_config.model.ssd.num_classes = len(labels) # Number of labels the model should be trained for
pipeline_config.train_config.batch_size = 4 # This should be the number of training jobs that run parallel

# Get checkpoint 0 from (original) downloaded model 
files['checkpoint0'] = paths['downloaded_model']+'/checkpoint/ckpt-0'

pipeline_config.train_config.fine_tune_checkpoint = files['checkpoint0']

pipeline_config.train_config.fine_tune_checkpoint_type = "detection"

# Get labelmap
pipeline_config.train_input_reader.label_map_path= files['labelmap']

# Get TF-Record
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [files['tf_train']]
pipeline_config.eval_input_reader[0].label_map_path = files['labelmap']
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [files['tf_test']]

config_text = text_format.MessageToString(pipeline_config)

# Update active pipeline
with tf.io.gfile.GFile(files['pipeline_active'], "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)   
    
print('Pipeline successfully configured...')

Pipeline replaced in active model directory...
Pipeline successfully configured...


In [22]:
# Copy model_main_tf2.py to workspace -> training   'TensorFlow/models/research/' file to 
source = paths['research']+'/object_detection/model_main_tf2.py'
destination = paths['training']
shutil.copy(source, destination)

'/content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/workspace/training/model_main_tf2.py'

# 6. Start new training job

In [23]:
files['training_script'] = paths['training']+'/model_main_tf2.py'
model_dir = paths['active_model']

command = "python {} --model_dir={} --pipeline_config_path={} --num_train_steps=5000".format(files['training_script'], model_dir, files['pipeline_active'])
# first argument: Path to the model_main_tf2.py file
# second argument: Path to the diretory in which the pipeline.config file is placed (not the path to the file itself)
# third argument: Path to actual pipeline.config in active directory
print(command)

python /content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/workspace/training/model_main_tf2.py --model_dir=/content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/workspace/training/models/my_ssd_mobilenet_v2_fpnlite --pipeline_config_path=/content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/workspace/training/models/my_ssd_mobilenet_v2_fpnlite/pipeline.config --num_train_steps=5000


In [24]:
# This command is necessary to fix issue with training on colab
# source: https://stackoverflow.com/questions/70998639/dnn-library-is-not-found-ssd-mobile-net-v2-in-colab#answer-72404540
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
The following packages will be REMOVED:
  libcudnn8-dev
The following held packages will be changed:
  libcudnn8
The following packages will be upgraded:
  libcudnn8
1 upgraded, 0 newly installed, 1 to remove and 47 not upgraded.
Need to get 430 MB of archives.
After this operation, 3,139 MB disk space will be freed.
Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  libcudnn8 8.1.0.77-1+cuda11.2 [430 MB]
Fetched 430 MB in 7s (63.3 MB/s)
(Reading database ... 155639 files and directories currently installed.)
Removing libcudnn8-dev (8.0.5.39-1+cuda11.1) ...
(Reading database ... 155617 files and directories currently installed.)
Preparing to unpack .../libcudnn8_8.1.0.77-1+cuda11.2_amd64.deb ...
Unpacking libcudnn8 (8.1.0.77-1+c

In [25]:
!{command}

2022-07-01 08:53:49.986756: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
I0701 08:53:49.993685 140609947961216 mirrored_strategy.py:374] Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
INFO:tensorflow:Maybe overwriting train_steps: 5000
I0701 08:53:50.000331 140609947961216 config_util.py:552] Maybe overwriting train_steps: 5000
INFO:tensorflow:Maybe overwriting use_bfloat16: False
I0701 08:53:50.000506 140609947961216 config_util.py:552] Maybe overwriting use_bfloat16: False
Instructions for updating:
rename to distribute_datasets_from_function
W0701 08:53:50.145030 140609947961216 deprecation.py:356] From /usr/local/lib/python3.7/dist-packages/object_detection/model_lib_v2.py:564: StrategyBase.experimental

# Evaluate training

In [26]:
command = "python {} --model_dir={} --pipeline_config_path={} --checkpoint_dir={}".format(files['training_script'], model_dir, files['pipeline_active'], model_dir)
!{command}


Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/absl/app.py", line 312, in run
    _run_main(main, args)
  File "/usr/local/lib/python3.7/dist-packages/absl/app.py", line 258, in _run_main
    sys.exit(main(argv))
  File "/content/drive/MyDrive/Colab_Notebooks/Object_Detection/2_Tensorflow/workspace/training/model_main_tf2.py", line 89, in main
    wait_interval=300, timeout=FLAGS.eval_timeout)
  File "/usr/local/lib/python3.7/dist-packages/object_detection/model_lib_v2.py", line 1136, in eval_continuously
    checkpoint_dir, timeout=timeout, min_interval_secs=wait_interval):
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/checkpoint_utils.py", line 195, in checkpoints_iterator
    checkpoint_dir, checkpoint_path, timeout=timeout)
  File "/usr/local/lib/python3.7/dist-packages/tensorflow/python/training/checkpoint_utils.py", line 143, in wait_for_new_checkpoint
    time.sleep(seconds_to_sleep)
KeyboardInterrupt

During handlin

# Download model

In [34]:
# Make zip file from directory
import zipfile
os.chdir(paths['models'])
    
def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file), 
                       os.path.relpath(os.path.join(root, file), 
                                       os.path.join(path, '..')))

with zipfile.ZipFile(custom_model_name+'.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipdir(paths['active_model'], zipf)

# Download zip file to local machine manually

