In [1]:
import cv2, time, os, tensorflow as tf, numpy as np
from tensorflow.python.keras.utils.data_utils import get_file

In [2]:
# file containing list of objects
class_file= "coco.names"


In [3]:
# create a list of objects
with open(class_file,'r') as f:
    class_list=f.read().splitlines()
class_list

['__Background__',
 ' person',
 ' bicycle',
 ' car',
 ' motorcycle',
 ' airplane',
 ' bus',
 ' train',
 ' truck',
 ' boat',
 ' traffic light',
 ' fire hydrant',
 ' street sign',
 ' stop sign',
 ' parking meter',
 ' bench',
 ' bird',
 ' cat',
 ' dog',
 ' horse',
 ' sheep',
 ' cow',
 ' elephant',
 ' bear',
 ' zebra',
 ' giraffe',
 ' hat',
 ' backpack',
 ' umbrella',
 ' shoe',
 ' eye glasses',
 ' handbag',
 ' tie',
 ' suitcase',
 ' frisbee',
 ' skis',
 ' snowboard',
 ' sports ball',
 ' kite',
 ' baseball bat',
 ' baseball glove',
 ' skateboard',
 ' surfboard',
 ' tennis racket',
 ' bottle',
 ' plate',
 ' wine glass',
 ' cup',
 ' fork',
 ' knife',
 ' spoon',
 ' bowl',
 ' banana',
 ' apple',
 ' sandwich',
 ' orange',
 ' broccoli',
 ' carrot',
 ' hot dog',
 ' pizza',
 ' donut',
 ' cake',
 ' chair',
 ' couch',
 ' potted plant',
 ' bed',
 ' mirror',
 ' dining table',
 ' window',
 ' desk',
 ' toilet',
 ' door',
 ' tv',
 ' laptop',
 ' mouse',
 ' remote',
 ' keyboard',
 ' cell phone',
 ' microwav

In [4]:
# color list for different classes
color_list=np.random.uniform(low=0,high=255,size=(len(class_list)))
print(len(class_list),len(color_list))                             


92 92


In [5]:

#cache dir where all models willbe downloaded
models_dir= "pretrained_models"

os.makedirs(models_dir, exist_ok=True)


In [6]:
# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md

model_URL="http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz"
model_file=os.path.basename(model_URL)
print(model_file)

efficientdet_d0_coco17_tpu-32.tar.gz


In [7]:
#download model file
get_file(fname=model_file,origin=model_URL, cache_dir=models_dir, cache_subdir="checkpoints", extract=True)


Downloading data from http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz


'./pretrained_models\\checkpoints\\efficientdet_d0_coco17_tpu-32.tar.gz'

In [8]:
# load Model
model_name=model_file[:model_file.index('.')]
print(model_name)
tf.keras.backend.clear_session()
model=tf.saved_model.load(os.path.join(models_dir,"checkpoints",model_name,"saved_model"))

print("Model: "+ model_name+" loaded successfully")


efficientdet_d0_coco17_tpu-32





Model: efficientdet_d0_coco17_tpu-32 loaded successfully


In [9]:
 
#read test image
image_path= "test/image1.jpg"
image=cv2.imread(image_path)


In [10]:
# image to tensor conversion
image_np=cv2.cvtColor(image.copy(),cv2.COLOR_BGR2RGB) #converts the color format from BGR (used by OpenCV) to RGB (typically used by TensorFlow and other libraries).
image_tensor=tf.convert_to_tensor(image_np,dtype=tf.uint8) # converts the RGB image (now a NumPy array) into a TensorFlow tensor
image_tensor=image_tensor[tf.newaxis,...] # image_tensor will have a shape of (1, height, width, 3)

In [11]:
image_tensor

<tf.Tensor: shape=(1, 636, 1024, 3), dtype=uint8, numpy=
array([[[[ 53,  39,  26],
         [ 40,  26,  15],
         [ 27,  13,   4],
         ...,
         [233, 221, 199],
         [234, 222, 200],
         [234, 222, 200]],

        [[ 52,  38,  27],
         [ 41,  27,  16],
         [ 31,  17,   8],
         ...,
         [231, 219, 197],
         [231, 219, 197],
         [231, 219, 197]],

        [[ 49,  37,  25],
         [ 43,  29,  20],
         [ 38,  24,  15],
         ...,
         [228, 216, 194],
         [228, 216, 194],
         [228, 216, 194]],

        ...,

        [[ 66,  65,  79],
         [ 64,  63,  77],
         [ 64,  63,  77],
         ...,
         [100, 102, 123],
         [ 92,  96, 121],
         [ 91,  97, 123]],

        [[ 72,  72,  82],
         [ 71,  71,  81],
         [ 68,  68,  78],
         ...,
         [101, 103, 124],
         [ 97, 101, 126],
         [102, 108, 134]],

        [[ 77,  78,  83],
         [ 78,  79,  84],
         [ 75,  7

In [12]:
detections = model(image_tensor) # runs the image_tensor through a trained model to generate predictions

In [13]:
detections

{'detection_boxes': <tf.Tensor: shape=(1, 100, 4), dtype=float32, numpy=
 array([[[2.23913118e-02, 1.48446262e-02, 8.55852187e-01, 3.10046166e-01],
         [1.80860162e-01, 3.90633434e-01, 9.13722336e-01, 9.75541234e-01],
         [6.66696668e-01, 3.17782819e-01, 8.27371836e-01, 4.66989934e-01],
         [0.00000000e+00, 7.65215933e-01, 6.99731350e-01, 1.00000000e+00],
         [0.00000000e+00, 3.04474711e-01, 8.38686943e-01, 7.12805629e-01],
         [3.00226733e-02, 3.21255565e-01, 8.10426772e-01, 7.29539990e-01],
         [5.85378528e-01, 4.13841605e-01, 8.42679560e-01, 7.13512063e-01],
         [0.00000000e+00, 7.59520173e-01, 7.11742282e-01, 1.00000000e+00],
         [1.03309341e-01, 3.77020508e-01, 9.03490543e-01, 9.08252120e-01],
         [1.63228977e-02, 0.00000000e+00, 4.50451136e-01, 3.03562135e-01],
         [6.95664883e-01, 3.47331166e-01, 7.91592240e-01, 4.70526934e-01],
         [5.41779585e-02, 5.78419864e-03, 8.73236120e-01, 3.19961369e-01],
         [6.58288181e-01, 5

In [14]:
detections.keys()

dict_keys(['detection_boxes', 'detection_scores', 'raw_detection_scores', 'detection_anchor_indices', 'raw_detection_boxes', 'detection_classes', 'num_detections', 'detection_multiclass_scores'])

In [15]:
detections['detection_boxes']

<tf.Tensor: shape=(1, 100, 4), dtype=float32, numpy=
array([[[2.23913118e-02, 1.48446262e-02, 8.55852187e-01, 3.10046166e-01],
        [1.80860162e-01, 3.90633434e-01, 9.13722336e-01, 9.75541234e-01],
        [6.66696668e-01, 3.17782819e-01, 8.27371836e-01, 4.66989934e-01],
        [0.00000000e+00, 7.65215933e-01, 6.99731350e-01, 1.00000000e+00],
        [0.00000000e+00, 3.04474711e-01, 8.38686943e-01, 7.12805629e-01],
        [3.00226733e-02, 3.21255565e-01, 8.10426772e-01, 7.29539990e-01],
        [5.85378528e-01, 4.13841605e-01, 8.42679560e-01, 7.13512063e-01],
        [0.00000000e+00, 7.59520173e-01, 7.11742282e-01, 1.00000000e+00],
        [1.03309341e-01, 3.77020508e-01, 9.03490543e-01, 9.08252120e-01],
        [1.63228977e-02, 0.00000000e+00, 4.50451136e-01, 3.03562135e-01],
        [6.95664883e-01, 3.47331166e-01, 7.91592240e-01, 4.70526934e-01],
        [5.41779585e-02, 5.78419864e-03, 8.73236120e-01, 3.19961369e-01],
        [6.58288181e-01, 5.89688480e-01, 8.15932214e-01, 7.

In [16]:
detections['detection_boxes'][0]

<tf.Tensor: shape=(100, 4), dtype=float32, numpy=
array([[2.23913118e-02, 1.48446262e-02, 8.55852187e-01, 3.10046166e-01],
       [1.80860162e-01, 3.90633434e-01, 9.13722336e-01, 9.75541234e-01],
       [6.66696668e-01, 3.17782819e-01, 8.27371836e-01, 4.66989934e-01],
       [0.00000000e+00, 7.65215933e-01, 6.99731350e-01, 1.00000000e+00],
       [0.00000000e+00, 3.04474711e-01, 8.38686943e-01, 7.12805629e-01],
       [3.00226733e-02, 3.21255565e-01, 8.10426772e-01, 7.29539990e-01],
       [5.85378528e-01, 4.13841605e-01, 8.42679560e-01, 7.13512063e-01],
       [0.00000000e+00, 7.59520173e-01, 7.11742282e-01, 1.00000000e+00],
       [1.03309341e-01, 3.77020508e-01, 9.03490543e-01, 9.08252120e-01],
       [1.63228977e-02, 0.00000000e+00, 4.50451136e-01, 3.03562135e-01],
       [6.95664883e-01, 3.47331166e-01, 7.91592240e-01, 4.70526934e-01],
       [5.41779585e-02, 5.78419864e-03, 8.73236120e-01, 3.19961369e-01],
       [6.58288181e-01, 5.89688480e-01, 8.15932214e-01, 7.13399827e-01],
 

In [17]:
type(detections['detection_boxes'][0])

tensorflow.python.framework.ops.EagerTensor

In [18]:
bboxs=detections['detection_boxes'][0].numpy()
bboxs

array([[2.23913118e-02, 1.48446262e-02, 8.55852187e-01, 3.10046166e-01],
       [1.80860162e-01, 3.90633434e-01, 9.13722336e-01, 9.75541234e-01],
       [6.66696668e-01, 3.17782819e-01, 8.27371836e-01, 4.66989934e-01],
       [0.00000000e+00, 7.65215933e-01, 6.99731350e-01, 1.00000000e+00],
       [0.00000000e+00, 3.04474711e-01, 8.38686943e-01, 7.12805629e-01],
       [3.00226733e-02, 3.21255565e-01, 8.10426772e-01, 7.29539990e-01],
       [5.85378528e-01, 4.13841605e-01, 8.42679560e-01, 7.13512063e-01],
       [0.00000000e+00, 7.59520173e-01, 7.11742282e-01, 1.00000000e+00],
       [1.03309341e-01, 3.77020508e-01, 9.03490543e-01, 9.08252120e-01],
       [1.63228977e-02, 0.00000000e+00, 4.50451136e-01, 3.03562135e-01],
       [6.95664883e-01, 3.47331166e-01, 7.91592240e-01, 4.70526934e-01],
       [5.41779585e-02, 5.78419864e-03, 8.73236120e-01, 3.19961369e-01],
       [6.58288181e-01, 5.89688480e-01, 8.15932214e-01, 7.13399827e-01],
       [2.38223281e-02, 6.75581098e-01, 8.64110172e

In [19]:
detections['detection_classes'][0]


<tf.Tensor: shape=(100,), dtype=float32, numpy=
array([18., 18., 51., 63.,  1., 18., 51., 62.,  1., 18., 51.,  1., 51.,
       18., 21., 17., 18., 63., 15.,  1., 20.,  1., 51., 18., 21., 18.,
        1., 88., 19., 79., 64.,  1., 20., 19., 62., 51.,  1., 17., 15.,
        1., 17., 51., 63.,  1., 20., 62.,  3., 63., 33., 67., 23., 17.,
       49., 81.,  1., 88.,  1.,  1., 18., 15., 18., 64., 16., 65., 15.,
       70., 16., 21., 52., 31., 49., 63., 62., 51., 55., 18., 17., 65.,
       50., 33., 86., 18., 15.,  4., 67., 62., 15., 18., 31., 63., 70.,
       15., 67., 51., 61., 63., 52., 19., 79.,  1.], dtype=float32)>

In [20]:
class_indexes=detections['detection_classes'][0].numpy().astype(np.int32)
class_indexes

array([18, 18, 51, 63,  1, 18, 51, 62,  1, 18, 51,  1, 51, 18, 21, 17, 18,
       63, 15,  1, 20,  1, 51, 18, 21, 18,  1, 88, 19, 79, 64,  1, 20, 19,
       62, 51,  1, 17, 15,  1, 17, 51, 63,  1, 20, 62,  3, 63, 33, 67, 23,
       17, 49, 81,  1, 88,  1,  1, 18, 15, 18, 64, 16, 65, 15, 70, 16, 21,
       52, 31, 49, 63, 62, 51, 55, 18, 17, 65, 50, 33, 86, 18, 15,  4, 67,
       62, 15, 18, 31, 63, 70, 15, 67, 51, 61, 63, 52, 19, 79,  1])

In [21]:
class_scores=detections['detection_scores'][0].numpy()
class_scores

array([0.8404553 , 0.6271829 , 0.47305855, 0.38983604, 0.3617795 ,
       0.34211454, 0.22777662, 0.21836625, 0.20259193, 0.19431694,
       0.17641121, 0.17597066, 0.166858  , 0.16553412, 0.16476406,
       0.1600988 , 0.15856722, 0.15298282, 0.15218744, 0.15126096,
       0.14983998, 0.14650044, 0.13032514, 0.12530741, 0.12367757,
       0.12095757, 0.11862903, 0.11556328, 0.11438581, 0.11326766,
       0.10742825, 0.10604662, 0.10541444, 0.10447384, 0.10078327,
       0.09843926, 0.09672134, 0.09579492, 0.09299016, 0.08992459,
       0.08932109, 0.08724906, 0.0869742 , 0.0868568 , 0.08680961,
       0.085035  , 0.08445096, 0.08231018, 0.08166456, 0.0814077 ,
       0.08092276, 0.08030824, 0.08006994, 0.07963908, 0.0792805 ,
       0.07761956, 0.07746154, 0.07727689, 0.07690878, 0.07685439,
       0.07625259, 0.07622874, 0.07588945, 0.07567362, 0.07318679,
       0.07317931, 0.07200006, 0.0715774 , 0.07123972, 0.07016946,
       0.06977452, 0.06974751, 0.06959371, 0.06953295, 0.06898

In [22]:
iH,iW,iC=image.shape

In [23]:
iH,iW,iC

(636, 1024, 3)

In [24]:
# bounding box
#if bbox is found i.e. object is detected
if len(bboxs)!=0:
    for i in range(0,len(bboxs)):
        bbox=tuple(bboxs[i].tolist())
        class_confidence=round(100*class_scores[i])
        class_index=class_indexes[i]
        
        if class_confidence>50.0:            
            print("class_confidence--",class_confidence)
            class_lbl_txt=class_list[class_index]
            class_color=color_list[class_index]

            display_text=str(class_lbl_txt)+" "+str(class_confidence)

            #get values of bbox
            #below values are normalised x,y cordinates, 
            #Normalized x, y coordinates refer to coordinates that are scaled to fit within a fixed range, typically [0,1], rather than using the original pixel values. 
            ymin,xmin,ymax,xmax=bbox
            #converting them to actual x,y values
            xmin,xmax,ymin,ymax=(xmin*iW,xmax*iW,ymin*iH,ymax*iH)
            #converting flaot to int
            xmin,xmax,ymin,ymax=int(xmin),int(xmax),int(ymin),int(ymax)

            cv2.rectangle(image,(xmin,ymin),(xmax,ymax), color=class_color,thickness=2)
            cv2.putText(image,display_text,(xmin,ymin+10),cv2.FONT_HERSHEY_PLAIN,1,class_color,2)

    #cv2.imshow("result",image)
    cv2.imwrite(model_name+".jpg",image)

class_confidence-- 84
class_confidence-- 63


In [25]:
display_text

' dog 63'

In [26]:
# Object detection in Video 
def createBoundingBox(image):
    image_np=cv2.cvtColor(image.copy(),cv2.COLOR_BGR2RGB)
    image_tensor=tf.convert_to_tensor(image_np,dtype=tf.uint8)
    image_tensor=image_tensor[tf.newaxis,...]
    detections=model(image_tensor)
    bboxs=detections['detection_boxes'][0].numpy()
    class_indexes=detections['detection_classes'][0].numpy().astype(np.int32)
    class_scores=detections['detection_scores'][0].numpy()
    iH,iW,iC=image.shape
    if len(bboxs)!=0:
        for i in range(0,len(bboxs)):
            bbox=tuple(bboxs[i].tolist())
            class_confidence=round(100*class_scores[i])
            class_index=class_indexes[i]
            
            if class_confidence>50.0:            
                print("class_confidence--",class_confidence)
                class_lbl_txt=class_list[class_index]
                class_color=color_list[class_index]
    
                display_text=str(class_lbl_txt)+" "+str(class_confidence)
    
                #get values of bbox
                #below values are normalised x,y cordinates
                ymin,xmin,ymax,xmax=bbox
                #converting them to actual x,y values
                xmin,xmax,ymin,ymax=(xmin*iW,xmax*iW,ymin*iH,ymax*iH)
                #converting flaot to int
                xmin,xmax,ymin,ymax=int(xmin),int(xmax),int(ymin),int(ymax)
    
                cv2.rectangle(image,(xmin,ymin),(xmax,ymax), color=class_color,thickness=2)
                cv2.putText(image,display_text,(xmin,ymin+10),cv2.FONT_HERSHEY_PLAIN,1,class_color,2)
    return image


In [27]:
# video detection
videoPath = ".\\Video data\\traffic-mini.mp4"

cap = cv2.VideoCapture(videoPath)
if (cap.isOpened() == False) :
    print("Error opening file")
# Read until video is completed 
success, image = cap.read() 
startTime = 0
while success:
    currentTime = time.time()
    # Calculates the FPS. It measures the time taken to process a single frame
    # currentTime-startTime gives the elapsed time between the current frame and the previous frame
    fps = 1/(currentTime-startTime)
    startTime = currentTime
    boxImage = createBoundingBox(image)
    cv2.putText(boxImage, "FPS:"+ str(int(fps)), (20, 70), cv2.FONT_HERSHEY_PLAIN, 2, (0,255,0),2)
    cv2.imshow("Result", boxImage)
    key= cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break
    (success, image) = cap.read()
cap.release() 
cv2.destroyAllWindows()
    

class_confidence-- 83
class_confidence-- 78
class_confidence-- 66
class_confidence-- 64
class_confidence-- 64
class_confidence-- 63
class_confidence-- 60
class_confidence-- 59
class_confidence-- 82
class_confidence-- 77
class_confidence-- 66
class_confidence-- 66
class_confidence-- 65
class_confidence-- 64
class_confidence-- 63
class_confidence-- 59
class_confidence-- 51
class_confidence-- 82
class_confidence-- 78
class_confidence-- 67
class_confidence-- 65
class_confidence-- 64
class_confidence-- 61
class_confidence-- 60
class_confidence-- 60
class_confidence-- 55
class_confidence-- 83
class_confidence-- 79
class_confidence-- 69
class_confidence-- 67
class_confidence-- 66
class_confidence-- 64
class_confidence-- 63
class_confidence-- 59
class_confidence-- 52
class_confidence-- 83
class_confidence-- 79
class_confidence-- 69
class_confidence-- 69
class_confidence-- 68
class_confidence-- 67
class_confidence-- 63
class_confidence-- 62
class_confidence-- 53
class_confidence-- 83
class_conf