In [1]:
# Imports
import tensorflow as tf
import numpy as np
import cv2

from models.research.object_detection.utils import label_map_util
from models.research.object_detection.utils import visualization_utils as vis_util

In [2]:
# Grabbing basketball_net ssd trained on COCO

category_index = label_map_util.create_category_index_from_labelmap('basketball_training/label_map.pbtxt', 
                                                                    use_display_name=True)

def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the 
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we import the graph_def into a new Graph and return it 
    with tf.Graph().as_default() as graph:
        # The name var will prefix every op/nodes in your graph
        # Since we load everything in a new graph, this is not needed
        tf.import_graph_def(graph_def, name='')
    return graph


graph = load_graph('basketball_training/inceptionv2-8086/frozen_inference_graph.pb')
sess = tf.Session(graph=graph)
# for op in graph.get_operations(): 
#     print(op.name, op.outputs)
    




In [9]:

def showInference(image_np):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = graph.get_tensor_by_name('image_tensor:0')
    # Each box represents a part of the image where a particular object was detected.
    boxes = graph.get_tensor_by_name('detection_boxes:0')
    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = graph.get_tensor_by_name('detection_scores:0')
    classes = graph.get_tensor_by_name('detection_classes:0')
    num_detections = graph.get_tensor_by_name('num_detections:0')
    
    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
      [boxes, scores, classes, num_detections],
      feed_dict={image_tensor: image_np_expanded})
    
    # Data to send via serial
    box_data = np.squeeze(boxes)[0]
    
    center = None
    if np.prod(box_data):
        ymin, xmin, ymax, xmax = box_data
        #center = (image_np.shape[0]*(xmin + xmax) / 2, image_np.shape[1]*(ymin + ymax) / 2)
        center = [image_np.shape[1]*ymin-10, image_np.shape[1]*ymax+10, image_np.shape[0]*xmin-10, image_np.shape[0]*xmax+10]

    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      np.squeeze(boxes),
      np.squeeze(classes).astype(np.int32),
      np.squeeze(scores),
      category_index,
      use_normalized_coordinates=True,
      line_thickness=8)
    
    return center

In [4]:
import queue, threading

# bufferless VideoCapture
class VideoCapture:
  def __init__(self, name, width, height):
    self.cap = cv2.VideoCapture(name)
    self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
    self.q = queue.Queue()
    t = threading.Thread(target=self._reader)
    t.daemon = True
    t.start()

  # read frames as soon as they are available, keeping only most recent one
  def _reader(self):
    while True:
      ret, frame = self.cap.read()
      if not ret:
        break
      if not self.q.empty():
        try:
          self.q.get_nowait()   # discard previous (unprocessed) frame
        except queue.Empty:
          pass
      self.q.put(frame)
        
  def getWidth(self):
    return int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))

  def getHeight(self):
    return int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))

  def isOpened(self):
    return self.cap.isOpened()

  def read(self):
    return self.q.get()

  def release(self):
    self.cap.release()

In [10]:
# Video Capture

cap = VideoCapture(0, 1280, 720)
w = cap.getWidth()
h = cap.getHeight()

print(w,h)

orig_center = [w//2 - 240, w//2 + 240, h//2 - 240, h//2 + 240]
new_center = orig_center[:]

while cap.isOpened():
    image_np = cap.read()
    image_np = image_np[new_center[2]:new_center[3], new_center[0]:new_center[1]]
    
    center = showInference(image_np)
    if center:
        for i in range(len(center)):
            if new_center[i] - center[i] > 0:
                new_center[i] -= int(max(1, int(0.1*(new_center[i] - center[i]))))
            else:
                new_center[i] += int(max(1, int(0.1*(center[i] - new_center[i]))))
        
    else:
        new_center[0] -= 10
        new_center[1] += 10
        new_center[2] -= 10
        new_center[3] += 10
        
    if new_center[0] < 0 or new_center[1] > 1280 or new_center[2] < 0 or new_center[3] > 1280:
        new_center = orig_center[:]
    print(new_center)
    cv2.imshow('object detection', image_np)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break
    
cap.release()

1280 1280
[390, 890, 390, 890]
[380, 900, 380, 900]
[370, 910, 370, 910]
[360, 920, 360, 920]
[350, 930, 350, 930]
[340, 940, 340, 940]
[330, 950, 330, 950]
[320, 960, 320, 960]
[310, 970, 310, 970]
[300, 980, 300, 980]
[290, 990, 290, 990]
[280, 1000, 280, 1000]
[270, 1010, 270, 1010]
[260, 1020, 260, 1020]
[250, 1030, 250, 1030]
[240, 1040, 240, 1040]
[230, 1050, 230, 1050]
[220, 1060, 220, 1060]
[210, 1070, 210, 1070]
[200, 1080, 200, 1080]
[190, 1090, 190, 1090]
[180, 1100, 180, 1100]
[170, 1110, 170, 1110]
[160, 1120, 160, 1120]
[150, 1130, 150, 1130]
[140, 1140, 140, 1140]
[130, 1150, 130, 1150]
[120, 1160, 120, 1160]
[110, 1170, 110, 1170]
[100, 1180, 100, 1180]
[90, 1190, 90, 1190]
[80, 1200, 80, 1200]
[70, 1210, 70, 1210]
[60, 1220, 60, 1220]
[50, 1230, 50, 1230]
[40, 1240, 40, 1240]
[30, 1250, 30, 1250]
[20, 1260, 20, 1260]
[10, 1270, 10, 1270]
[0, 1280, 0, 1280]
[400, 880, 400, 880]
[390, 890, 390, 890]
[380, 900, 380, 900]
[370, 910, 370, 910]
[360, 920, 360, 920]
[350, 930

[138, 465, 105, 505]
[128, 475, 95, 515]
[118, 485, 85, 525]
[108, 495, 75, 535]
[98, 505, 65, 545]
[88, 515, 55, 555]
[78, 525, 45, 565]
[68, 535, 35, 575]
[58, 545, 25, 585]
[48, 555, 15, 595]
[38, 565, 5, 605]
[59, 543, 31, 583]
[76, 521, 52, 561]
[89, 499, 70, 540]
[99, 478, 85, 520]
[106, 458, 98, 503]
[112, 439, 109, 486]
[116, 421, 119, 472]
[119, 404, 128, 458]
[121, 387, 136, 446]
[122, 371, 143, 434]
[123, 356, 149, 421]
[124, 342, 154, 408]
[123, 329, 157, 394]
[122, 317, 159, 380]
[123, 306, 160, 366]
[113, 316, 150, 376]
[114, 306, 151, 362]
[104, 316, 141, 372]
[107, 307, 143, 359]
[97, 317, 133, 369]
[102, 309, 136, 357]
[92, 319, 126, 367]
[98, 311, 130, 356]
[88, 321, 120, 366]
[78, 331, 110, 376]
[68, 341, 100, 386]
[58, 351, 90, 396]
[48, 361, 80, 406]
[38, 371, 70, 416]
[28, 381, 60, 426]
[18, 391, 50, 436]
[8, 401, 40, 446]
[400, 880, 400, 880]
[390, 890, 390, 890]
[361, 844, 367, 830]
[332, 804, 351, 782]
[322, 814, 341, 792]
[311, 778, 332, 752]
[301, 788, 322, 7

In [None]:
# Captured Video processing - android

cap = VideoCapture('tests/sanjay.mp4')
w = cap.getWidth()
h = cap.getHeight()

frame_count = 0
while True:
    image_np = cap.read()
    
    #image_np = image_np[h//2 - 150:h//2 + 150, w//2 - 150:w//2 + 150]
    image_np = image_np[h//2 - 300:h//2 + 300, w//2 - 300:w//2 + 300]
    
    if not (frame_count % 1):
        showInference(image_np)
        
    frame_count += 1
    cv2.imshow('object detection', image_np)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break
    

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

img = cv2.imread('tests/sports_balls.jpg')
resized_img = cv2.resize(img, (640,640), interpolation = cv2.INTER_AREA) 

image_np = np.asarray(resized_img)
image_bgr = np.flip(image_np, axis=2)

showInference(image_bgr)

display(Image.fromarray(image_bgr))

    


In [None]:
# Traditional CV - Pretty bad
from PIL import Image

img = cv2.imread('tests/rodman2.jpg')

# convert to HSV space
im_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# take only the orange, highly saturated, and bright parts
im_hsv = cv2.inRange(im_hsv, (7,180,180), (11,255,255))

# To show the detected orange parts:
im_orange = img.copy()
im_orange[im_hsv==0] = 0
# cv2.imshow('im_orange',im_orange)

# Perform opening to remove smaller elements
element = np.ones((5,5)).astype(np.uint8)
im_hsv = cv2.erode(im_hsv, element)
im_hsv = cv2.dilate(im_hsv, element)

points = np.dstack(np.where(im_hsv>0)).astype(np.float32)
# fit a bounding circle to the orange points
center, radius = cv2.minEnclosingCircle(points)
# draw this circle
cv2.circle(img, (int(center[1]), int(center[0])), int(radius), (255,0,0), thickness=6)

out = np.vstack([im_orange,img])
cv2.imwrite('tests/out.png',out)

In [None]:
# Image cropping - preserving bounding box and generating new XML

from PIL import Image, ImageDraw
from lxml import etree 
import os
import glob

image_list = []
xmin_list = []
ymin_list = []
ymax_list = []
xmax_list = []
fp1 = 'tests/cropping/*.jpg' #input filepath
fp2 = 'tests/cropping'#bounding box info filepath
fp3 = 'tests/cropping/output' #output folder

#img1387_480.xml

#Drawing the bounding box and saving      
for file in glob.glob(fp1): 
    im=Image.open(file)
    width, height = im.size
    image_name, ext = os.path.splitext(file)
    xml_filename = os.path.join(fp2, os.path.basename(image_name)+'.xml')
    if os.path.exists(xml_filename):
        xmin_list = []
        ymin_list = []
        ymax_list = []
        xmax_list = []
        tree = etree.parse(xml_filename)
        root = tree.getroot()
        xmax = root.findall('.//xmax')
        xmin = root.findall('.//xmin')
        ymax = root.findall('.//ymax')
        ymin = root.findall('.//ymin')
        
        w = root.find('.//width')
        h = root.find('.//height')

        for idx, i in enumerate(xmax):
            xmax_list.append(xmax[idx].text)
            xmin_list.append(xmin[idx].text)
            ymax_list.append(ymax[idx].text)
            ymin_list.append(ymin[idx].text)
        
        # Cropping image
        xMax = max([int(x) for x in xmax_list])
        xMin = min([int(x) for x in xmin_list])
        yMax = max([int(x) for x in ymax_list])
        yMin = min([int(x) for x in ymin_list])
        
        if xMin >= 0 and yMin >= 0 and xMax <= width and yMax <= height: # All bounding boxes can fit in cropped image
            left = max(0, xMin - 150)
            top = max(0, yMin - 150)
            right = min(xMax + 150, width)
            bottom = min(yMax + 150, height)
                        
            im = im.crop((left, top, right, bottom))
            
            w.text = str(im.width)
            h.text = str(im.height)
            
            for idx, i in enumerate(xmax_list):
                newXmin = int(xmin_list[idx]) - left
                newXmax = newXmin + (int(xmax_list[idx]) - int(xmin_list[idx]))
                newYmin = int(ymin_list[idx]) - top
                newYmax = newYmin + (int(ymax_list[idx]) - int(ymin_list[idx]))
                
                xmin[idx].text = str(newXmin)
                xmax[idx].text = str(newXmax)
                ymin[idx].text = str(newYmin)
                ymax[idx].text = str(newYmax)
                
#                 #Drawing the lines
#                 d = ImageDraw.Draw(im)
#                 top_left = (newXmin, newYmin)
#                 top_right = (newXmax, newYmin)
#                 bot_left = (newXmin, newYmax)
#                 bot_right = (newXmax, newYmax)
#                 line_color = (0, 255, 0)
#                 d.line([top_left, top_right, bot_right, bot_left, top_left], fill=line_color, width=2)
        else:
            continue
        
        #display(im)
    else: 
        print(xml_filename + " Doesn't exist")
        
    
    #Saving the file to a new folder
    src_fname, ext = os.path.splitext(file)
    save_xml = os.path.join(fp3, os.path.basename(src_fname) + '.xml')
    save_fname = os.path.join(fp3, os.path.basename(src_fname)+'.JPEG')
    im.save(save_fname)
    tree.write(save_xml)
