In [1]:
# Imports
import tensorflow as tf
import numpy as np
import cv2

# from models.research.object_detection.utils import label_map_util
# from models.research.object_detection.utils import visualization_utils as vis_util

In [2]:
# Grabbing basketball_net ssd trained on COCO

# category_index = label_map_util.create_category_index_from_labelmap('basketball_training/label_map.pbtxt', 
#                                                                     use_display_name=True)

def load_graph(frozen_graph_filename):
    # We load the protobuf file from the disk and parse it to retrieve the 
    # unserialized graph_def
    with tf.gfile.GFile(frozen_graph_filename, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # Then, we import the graph_def into a new Graph and return it 
    with tf.Graph().as_default() as graph:
        # The name var will prefix every op/nodes in your graph
        # Since we load everything in a new graph, this is not needed
        tf.import_graph_def(graph_def, name='')
    return graph


graph = load_graph('basketball_training/inceptionv2-8086/frozen_inference_graph.pb')
sess = tf.Session(graph=graph)
# for op in graph.get_operations(): 
#     print(op.name, op.outputs)
    

In [3]:
def drawBoxes(image_np, coordinates):
    
    # ymin, xmin, ymax, xmax
    # h, w, rgb
    ymin = int(coordinates[0]*image_np.shape[0])
    ymax = int(coordinates[2]*image_np.shape[0])
    xmin = int(coordinates[1]*image_np.shape[1])
    xmax = int(coordinates[3]*image_np.shape[1])
    
    colour = np.array([0, 255, 0], dtype = np.uint8)
    
    line_width = 3
    
    image_np[ymin:ymax, xmax-line_width:xmax+line_width] = colour
    image_np[ymin:ymax, xmin-line_width:xmin+line_width] = colour
    image_np[ymin-line_width:ymin+line_width, xmin:xmax] = colour
    image_np[ymax-line_width:ymax+line_width, xmin:xmax] = colour
    
    
def showInference(image_np):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = graph.get_tensor_by_name('image_tensor:0')
    # Each box represents a part of the image where a particular object was detected.
    boxes = graph.get_tensor_by_name('detection_boxes:0')
    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = graph.get_tensor_by_name('detection_scores:0')
    classes = graph.get_tensor_by_name('detection_classes:0')
    num_detections = graph.get_tensor_by_name('num_detections:0')
    
    # Actual detection.
    (boxes, scores, classes, num_detections) = sess.run(
      [boxes, scores, classes, num_detections],
      feed_dict={image_tensor: image_np_expanded})
    
    # Data to send via serial
    box_data = np.squeeze(boxes)[0]
    
    center = None
    confidence = None
    if np.prod(box_data):
        ymin, xmin, ymax, xmax = box_data
        
        # Relative Frame Coordinates
        center = [image_np.shape[1]*ymin, image_np.shape[1]*ymax, image_np.shape[0]*xmin, image_np.shape[0]*xmax]

        confidence = np.squeeze(scores)[0]
        drawBoxes(image_np, box_data)

    # Visualization of the results of a detection.
#     vis_util.visualize_boxes_and_labels_on_image_array(
#       image_np,
#       np.squeeze(boxes),
#       np.squeeze(classes).astype(np.int32),
#       np.squeeze(scores),
#       category_index,
#       use_normalized_coordinates=True,
#       line_thickness=8)
    
    return center, confidence

In [4]:
import queue, threading

# bufferless VideoCapture
class VideoCapture:
  def __init__(self, name, width, height):
    self.cap = cv2.VideoCapture(name)
    self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
    self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
    self.width = width
    self.height = height
    self.q = queue.Queue()
    self.read_lock = threading.Lock()
    t = threading.Thread(target=self._reader)
    t.daemon = True
    t.start()

  # read frames as soon as they are available, keeping only most recent one
  def _reader(self):
    while True:
      ret, frame = self.cap.read()
      if not ret:
        break
      if not self.q.empty():
        try:
          self.q.get_nowait()   # discard previous (unprocessed) frame
        except queue.Empty:
          pass
      self.q.put(frame)
        
  def getWidth(self):
    return 1280

  def getHeight(self):
    return 720

  def isOpened(self):
    return self.cap.isOpened()

  def read(self):
    return self.q.get()

  def release(self):
    self.cap.release()

In [5]:
# Kalman Filtering

# Update prior beliefs as to where ball must have been subject to movement
def update(mean1, var1, mean2, var2):
    new_mean = []
    for i in range(len(mean1)):
        new_mean.append((mean1[i] * var2 + mean2[i] * var1) / (var1 + var2))
    new_var = 1 / ((1 / var1) + (1 / var2))
    
    return new_mean, new_var


In [9]:
# Video Capture

cap = VideoCapture(1, 1280, 720)
w = cap.getWidth()
h = cap.getHeight()

print(w,h)

# Crop region is always in absolute coordinates

orig_center = [h//2 - 300, h//2 + 300, w//2 - 300, w//2 + 300]
crop_region = orig_center[:]
prev_mean = orig_center[:]
prev_var = 0.5

while cap.isOpened():
    image_np = cap.read()
    image_np = image_np[crop_region[0]:crop_region[1], crop_region[2]:crop_region[3]]
    
    # Padding is a function of the current crop size
    curr_mean, curr_var = showInference(image_np)
    
    if curr_mean and curr_var >= 0.6:
        # Update prev_mean 
        #prev_mean, prev_var = update(curr_mean, curr_var, prev_mean, prev_var)
        prev_mean, prev_var = curr_mean, curr_var # Ignore bayesian inference for now
        
        temp = crop_region
        speed_ratio = 0.6
        
        # Shift Right
        crop_region[2] += speed_ratio*(prev_mean[3])
        crop_region[3] += speed_ratio*(prev_mean[3])
        
        # Shift Left
        crop_region[2] -= speed_ratio*(temp[3] - temp[2] - prev_mean[2])
        crop_region[3] -= speed_ratio*(temp[3] - temp[2] - prev_mean[2])
        
        # Shift Up
        crop_region[0] -= speed_ratio*(temp[1] - temp[0] - prev_mean[0])
        crop_region[1] -= speed_ratio*(temp[1] - temp[0] - prev_mean[0])
        
        # Shift Down
        crop_region[0] += speed_ratio*(prev_mean[1])
        crop_region[1] += speed_ratio*(prev_mean[1])
        
        # Zooming In and Out
        ratioY = (prev_mean[1] - prev_mean[0]) / (crop_region[1] - crop_region[0])
        ratioX = (prev_mean[3] - prev_mean[2]) / (crop_region[3] - crop_region[2])
        
        max_ratio = max(ratioY, ratioX)
        threshold = 0.174
        zoom_fact = 500
    
        crop_region[0] -= zoom_fact*(max_ratio - threshold)
        crop_region[1] += zoom_fact*(max_ratio - threshold)
        crop_region[2] -= zoom_fact*(max_ratio - threshold)
        crop_region[3] += zoom_fact*(max_ratio - threshold)
        
        
    else:
        crop_region[0] -= 10
        crop_region[1] += 10
        crop_region[2] -= 10
        crop_region[3] += 10

    # Bounding
    if crop_region[0] < 0:
#             crop_region[1] -= crop_region[0]
        crop_region[1] = min(crop_region[1] - crop_region[0], cap.getHeight())
        crop_region[0] = 0
    if crop_region[1] > cap.getHeight():
#             crop_region[0] -= crop_region[1] - cap.getHeight()
        crop_region[0] = max(crop_region[0] - crop_region[1] - cap.getHeight(), 0)
        crop_region[1] = cap.getHeight()
    if crop_region[2] < 0:
#             crop_region[3] -= crop_region[2]
        crop_region[3] = min(crop_region[3] - crop_region[2], cap.getWidth())
        crop_region[2] = 0
    if crop_region[3] > cap.getWidth():
#             crop_region[2] -= crop_region[3] - cap.getWidth()
        crop_region[2] = max(crop_region[2] - crop_region[3] - cap.getWidth(), 0)
        crop_region[3] = cap.getWidth()

    # Square Check
    crop_width = crop_region[3] - crop_region[2]
    crop_height = crop_region[1] - crop_region[0]

    if crop_width > crop_height: # Wide
        diff = crop_width - crop_height
        crop_region[3] -= diff / 2
        crop_region[2] += diff / 2
    else:
        diff = crop_height - crop_width
        crop_region[0] += diff / 2
        crop_region[1] -= diff / 2

    crop_region[0] = int(crop_region[0])
    crop_region[1] = int(crop_region[1])
    crop_region[2] = int(crop_region[2])
    crop_region[3] = int(crop_region[3])
    cv2.imshow('object detection', cv2.resize(image_np, (480, 480)))
    if cv2.waitKey(25) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break
    
cap.release()

1280 720


In [7]:
# Captured Video processing - android

cap = cv2.VideoCapture('tests/sanjay.mp4')
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

frame_count = 0
while True:
    _, image_np = cap.read()
    
    image_np = image_np[h//2 - 480:h//2 + 480, w//2 - 480:w//2 + 480]
    #image_np = image_np[h//2 - 300:h//2 + 300, w//2 - 300:w//2 + 300]
    
    if not (frame_count % 1):
        showInference(image_np, [h//2 - 480, h//2 + 480, w//2 - 480,w//2 + 480])
        
    frame_count += 1
    cv2.imshow('object detection', image_np)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break
    

TypeError: showInference() takes 1 positional argument but 2 were given

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

img = cv2.imread('tests/sports_balls.jpg')
resized_img = cv2.resize(img, (640,640), interpolation = cv2.INTER_AREA) 

image_np = np.asarray(resized_img)
image_bgr = np.flip(image_np, axis=2)

showInference(image_bgr)

display(Image.fromarray(image_bgr))

    


In [None]:
# Traditional CV - Pretty bad
from PIL import Image

img = cv2.imread('tests/rodman2.jpg')

# convert to HSV space
im_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# take only the orange, highly saturated, and bright parts
im_hsv = cv2.inRange(im_hsv, (7,180,180), (11,255,255))

# To show the detected orange parts:
im_orange = img.copy()
im_orange[im_hsv==0] = 0
# cv2.imshow('im_orange',im_orange)

# Perform opening to remove smaller elements
element = np.ones((5,5)).astype(np.uint8)
im_hsv = cv2.erode(im_hsv, element)
im_hsv = cv2.dilate(im_hsv, element)

points = np.dstack(np.where(im_hsv>0)).astype(np.float32)
# fit a bounding circle to the orange points
center, radius = cv2.minEnclosingCircle(points)
# draw this circle
cv2.circle(img, (int(center[1]), int(center[0])), int(radius), (255,0,0), thickness=6)

out = np.vstack([im_orange,img])
cv2.imwrite('tests/out.png',out)

In [None]:
# Image cropping - preserving bounding box and generating new XML

from PIL import Image, ImageDraw
from lxml import etree 
import os
import glob

image_list = []
xmin_list = []
ymin_list = []
ymax_list = []
xmax_list = []
fp1 = 'tests/cropping/*.jpg' #input filepath
fp2 = 'tests/cropping'#bounding box info filepath
fp3 = 'tests/cropping/output' #output folder

#img1387_480.xml

#Drawing the bounding box and saving      
for file in glob.glob(fp1): 
    im=Image.open(file)
    width, height = im.size
    image_name, ext = os.path.splitext(file)
    xml_filename = os.path.join(fp2, os.path.basename(image_name)+'.xml')
    if os.path.exists(xml_filename):
        xmin_list = []
        ymin_list = []
        ymax_list = []
        xmax_list = []
        tree = etree.parse(xml_filename)
        root = tree.getroot()
        xmax = root.findall('.//xmax')
        xmin = root.findall('.//xmin')
        ymax = root.findall('.//ymax')
        ymin = root.findall('.//ymin')
        
        w = root.find('.//width')
        h = root.find('.//height')

        for idx, i in enumerate(xmax):
            xmax_list.append(xmax[idx].text)
            xmin_list.append(xmin[idx].text)
            ymax_list.append(ymax[idx].text)
            ymin_list.append(ymin[idx].text)
        
        # Cropping image
        xMax = max([int(x) for x in xmax_list])
        xMin = min([int(x) for x in xmin_list])
        yMax = max([int(x) for x in ymax_list])
        yMin = min([int(x) for x in ymin_list])
        
        if xMin >= 0 and yMin >= 0 and xMax <= width and yMax <= height: # All bounding boxes can fit in cropped image
            left = max(0, xMin - 150)
            top = max(0, yMin - 150)
            right = min(xMax + 150, width)
            bottom = min(yMax + 150, height)
                        
            im = im.crop((left, top, right, bottom))
            
            w.text = str(im.width)
            h.text = str(im.height)
            
            for idx, i in enumerate(xmax_list):
                newXmin = int(xmin_list[idx]) - left
                newXmax = newXmin + (int(xmax_list[idx]) - int(xmin_list[idx]))
                newYmin = int(ymin_list[idx]) - top
                newYmax = newYmin + (int(ymax_list[idx]) - int(ymin_list[idx]))
                
                xmin[idx].text = str(newXmin)
                xmax[idx].text = str(newXmax)
                ymin[idx].text = str(newYmin)
                ymax[idx].text = str(newYmax)
                
#                 #Drawing the lines
#                 d = ImageDraw.Draw(im)
#                 top_left = (newXmin, newYmin)
#                 top_right = (newXmax, newYmin)
#                 bot_left = (newXmin, newYmax)
#                 bot_right = (newXmax, newYmax)
#                 line_color = (0, 255, 0)
#                 d.line([top_left, top_right, bot_right, bot_left, top_left], fill=line_color, width=2)
        else:
            continue
        
        #display(im)
    else: 
        print(xml_filename + " Doesn't exist")
        
    
    #Saving the file to a new folder
    src_fname, ext = os.path.splitext(file)
    save_xml = os.path.join(fp3, os.path.basename(src_fname) + '.xml')
    save_fname = os.path.join(fp3, os.path.basename(src_fname)+'.JPEG')
    im.save(save_fname)
    tree.write(save_xml)
