In [259]:
# %pip uninstall opencv-contrib-python opencv-python
# %pip install numpy opencv --user
# %pip install centroid-tracker

Image tracking

In [1]:
# Import necesarry packages
import numpy as np
import cv2

# Minimum probability so that an image can be classified as 'confident' 
# and thus identified
confidence = 0.5
# Non-maxima suppresion threshold, solves the problem of having multiple detections 
# for the same object by applying this value which reduces it to one detection for a 
# valid object 
threshold = 0.3

In [2]:
# Get the list of labels
labels = open('./yolo-coco/coco.names').read().strip().split('\n')

# Get the weights file
weights = './yolo-coco/yolov3.weights'
# Get the config file for the pre-trained model
configFile = './yolo-coco/yolov3.cfg'

# Load the YOLO object trained on the COCO dataset
network = cv2.dnn.readNetFromDarknet(configFile, weights)

# Get the layer names
layerNames = network.getLayerNames()
layerNamesOut = [layerNames[i - 1] for i in network.getUnconnectedOutLayers()]

In [4]:
# Get image (run this cell if image tracking is required)
image = cv2.imread('./output_video_static_Moment.jpg')
# Get image dimensions
height, width = image.shape[:2]
# Create a blob with respective parameters (check docs for more info)
blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), (0,0,0), 
                             swapRB=True, crop=False)
network.setInput(blob)
# Do a forward pass using the YOLO object detector (outputs 
# bounding boxes with respective probabilities)
layerOut = network.forward(layerNamesOut)

In [6]:
# Initialize lists which will contain: bounding boxes, confidence 
# and classID related by array index
boundingBoxes = []
confidenceList = []
classIDs = []

# Iterate over each layer output
for out in layerOut:
    # Iterate over each detection+
    for detection in out:
        # Get score, classID and condifence
        scores = detection[5:]
        currClass = np.argmax(scores)
        currConfidence = scores[currClass]

        # Check image confidence using value establashied previously
        if currConfidence > confidence: 
            # Scale bounding box relative to image size
            # The way YOLO formats its data is by returning the center coordinates of the bounding box
            # followed by the width and height, so with that in mind we apply it to our bonding box
            box = detection[0:4] * np.array([width, height, width, height])
            xCenter, yCenter, boxWidth, boxHeight = box.astype('int')

            # Use center coordinates to calculate top-left box corner coordinate (same as Android)
            x = int(xCenter - boxWidth / 2)
            y = int(yCenter - boxHeight / 2)

            # Add values to the lists
            boundingBoxes.append([x, y, int(boxWidth), int(boxHeight)])
            classIDs.append(currClass)
            confidenceList.append(float(currConfidence))

# Apply non-maxima suppresion threshold
results = cv2.dnn.NMSBoxes(boundingBoxes, confidenceList, confidence, threshold)
print("Results output is: {}".format(results))

Results output is: [38 32  6 35 30 24 29 17 12 10 16 15  5 27  4  2  3  0]


In [17]:
footballFieldImage = cv2.imread('./football_field.jpg')

# Real field coordinates
src = np.array([[1145, 310], [1915, 353], [1915, 1075], [1, 510]], np.float32)

'''
cv2.circle(image, (1145, 310), 20, (0, 0, 255), -2,)  # Upper left
cv2.circle(image, (1915, 353), 20, (0, 0, 255), -2,)  # Upper right
cv2.circle(image, (1915, 1075), 20, (0, 0, 255), -2,)  # Lower right
cv2.circle(image, (1, 510), 20, (0, 0, 255), -2,)  # Lower left

ims = cv2.resize(image, (960, 540))
cv2.imshow("Tracked players", ims)
cv2.waitKey(0)
'''

# Cartoon image field coordinates
dst = np.array([[52, 50], [550, 50], [800, 1120], [52, 880]], np.float32)

'''
cv2.circle(footballFieldImage, (52, 50), 15, (0, 0, 255), -2,)  # Upper left 
cv2.circle(footballFieldImage, (550, 50), 15, (0, 0, 255), -2,)  # Upper right 
cv2.circle(footballFieldImage, (800, 1120), 15, (0, 0, 255), -2,)  # Lower right
cv2.circle(footballFieldImage, (52, 880), 15, (0, 0, 255), -2,)  # Lower left 
'''

# Show output image
ims = cv2.resize(footballFieldImage, (960, 540))
cv2.imshow("Tracked players", ims)
cv2.waitKey(0)

# Get the perspective transformation matrix (x and y are the centroid coordinates)
pmatrix = cv2.getPerspectiveTransform(src, dst)

print(pmatrix) 

[[-5.63117892e+00 -3.43119474e+01  1.65517083e+04]
 [ 3.24112855e+00 -5.95395319e+01  1.42339557e+04]
 [ 4.40221099e-04 -3.78973971e-02  1.00000000e+00]]


In [18]:
# Draw the boxes on the image and its respective text

# Create a list of random colors
np.random.seed(21)
colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

# Make sure we have results on the analysis
if len(results) > 0:
    # Iterate over list indices
    for i in results:
        # Get bounding box coordinates
        x, y = boundingBoxes[i][0], boundingBoxes[i][1]
        w, h = boundingBoxes[i][2], boundingBoxes[i][3]

        # Draw bounding box with its label overlayed on the image
        currentBoxColour = [int(j) for j in colours[classIDs[i]]]
        cv2.rectangle(image, (x, y), (x+w, y+h), currentBoxColour, 2)
        currentBoxText = '{}: {:.3f}'.format(labels[classIDs[i]], confidenceList[i])
        cv2.putText(image, currentBoxText, (x, y - 6), cv2.FONT_HERSHEY_TRIPLEX, 0.5, currentBoxColour, 2)

        # Get projected coordinates on 2d field
        centerX, centerY = (x + w/2), (y + h/2)
        # print("Original coordinates:\n {}\n".format(np.array([centerX, centerY+h/4, 1])))
        val = np.dot(pmatrix, np.array([centerX, centerY+h/4, 1]))
        # print("Result coordinates after multiplication: \n{}\n".format(val))
        x2d, y2d = val[0]/val[2], val[1]/val[2]
        # print("Final coordinates ({}, {})".format(x2d, y2d))
        cv2.circle(footballFieldImage, (int(x2d), int(y2d)), 10, (255, 255, 0), -2)


# Show output image
ims = cv2.resize(image, (960, 540))
cv2.imshow("Tracked players", ims)
cv2.waitKey(0)

ims2 = cv2.resize(footballFieldImage, (960, 540))
cv2.imshow("Tracked players", ims2)
cv2.waitKey(0)


-1

Video tracking

In [31]:
# Import necesarry packages
import numpy as np
import cv2
import tracker

# Minimum probability so that an image can be classified as 'confident' 
# and thus identified
confidence = 0.5
# Non-maxima suppresion threshold, solves the problem of having multiple 
# detections for the same object by applying this value which reduces it 
# to one detection for a valid object 
threshold = 0.3
# Initialize centroid tracker which will track a specific player and allow 
# us to assign an ID to it
ctracker = tracker.CentroidTracker(maxDisappeared=20)

In [32]:
# Get the list of labels
labels = open('./yolo-coco/coco.names').read().strip().split('\n')

# Get the weights file
weights = './yolo-coco/yolov3.weights'
# Get the config file for the pre-trained model
configFile = './yolo-coco/yolov3.cfg'

# Load the YOLO object trained on the COCO dataset
network = cv2.dnn.readNetFromDarknet(configFile, weights)

# Get the layer names
layerNames = network.getLayerNames()
layerNamesOut = [layerNames[i - 1] for i in network.getUnconnectedOutLayers()]

In [33]:
# Get video (run this cell if video tracking is required)
# Change value depending on short or long vid
shortVid = True 
if shortVid:
    # video = cv2.VideoCapture('videos/madrid_psg_short.mp4')
    # outputVideo = 'output_video_short.mp4'
    video = cv2.VideoCapture('videos/madrid_psg_static.mp4')
    outputVideo = 'output_video_static.mp4'
else:
    video = cv2.VideoCapture('videos/madrid_psg.mp4')
    outputVideo = 'output_video.mp4'

writer = None
width, height = None, None
total = 0

# Get total no. of frames in the video
try: 
    total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
except:
    print("Couldn't get frame count")

print('No. of frames: {}'.format(total))


No. of frames: 75


In [34]:
# Real field coordinates
src = np.array([[1145, 310], [1915, 353], [1915, 1075], [1, 510]], np.float32)
# Cartoon image field coordinates
dst = np.array([[52, 50], [550, 50], [800, 1120], [52, 880]], np.float32)

# Get the perspective transformation matrix (x and y are the centroid coordinates)
pmatrix = cv2.getPerspectiveTransform(src, dst)

# Define output video
outputBEV = 'output_birds_eye_view.mp4'

In [35]:
def applyYoloToFrame(frame):
    # Create a blob with respective parameters (check docs for more info)
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), (0,0,0), swapRB=True, crop=False)
    network.setInput(blob)

    # Do a forward pass using the YOLO object detector (outputs bounding boxes with respective probabilities)
    layerOut = network.forward(layerNamesOut)

    # Initialize lists which will contain: bounding boxes, confidence and classID related by array index
    boundingBoxes = []
    confidenceList = []
    classIDs = []

    # Iterate over each layer output
    for out in layerOut:
        # Iterate over each detection+
        for detection in out:
            # Get score, classID and condifence
            scores = detection[5:]
            currClass = np.argmax(scores)
            currConfidence = scores[currClass]

            # Check image confidence using value establashied previously
            if currConfidence > confidence: 
                # Scale bounding box relative to image size
                # The way YOLO formats its data is by returning the center coordinates of the bounding box
                # followed by the width and height, so with that in mind we apply it to our bonding box
                box = detection[0:4] * np.array([width, height, width, height])
                xCenter, yCenter, boxWidth, boxHeight = box.astype('int')

                # Use center coordinates to calculate top-left box corner coordinate (same as Android)
                x = int(xCenter - (boxWidth / 2))
                y = int(yCenter - (boxHeight / 2))

                # Add values to the lists
                boundingBoxes.append(np.array([x, y, int(boxWidth), int(boxHeight)]))
                classIDs.append(currClass)
                confidenceList.append(float(currConfidence))

    # Apply non-maxima suppresion threshold
    rects = cv2.dnn.NMSBoxes(boundingBoxes, confidenceList, confidence, threshold)
    objects = ctracker.update([boundingBoxes[idx] for idx in rects])

    # Draw the boxes on the image and its respective text
    # Create a list of random colors
    np.random.seed(21)
    colours = np.random.randint(0, 255, size=(len(labels), 3), dtype='uint8')

    # Make sure we have results on the analysis
    for (id, bbox) in objects[1].items():
        # Get bounding box coordinates
        x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3]

        # Draw bounding box with its label overlayed on the image
        currentBoxColour = (255, 0, 0) # Blue
        cv2.rectangle(frame, (x, y), (x+w, y+h), currentBoxColour, 2)
        currentBoxText = 'Current ID: {}'.format(id)
        cv2.putText(frame, currentBoxText, (x, y - 6), cv2.FONT_HERSHEY_TRIPLEX, 0.5, currentBoxColour, 2)

        # Get projected coordinates on 2d field
        centerX, centerY = (x + w/2), (y + h/2)
        # print("Original coordinates:\n {}\n".format(np.array([centerX, centerY+h/4, 1])))
        val = np.dot(pmatrix, np.array([centerX, centerY+h/4, 1]))
        # print("Result coordinates after multiplication: \n{}\n".format(val))
        x2d, y2d = val[0]/val[2], val[1]/val[2]
        # print("Result coordinates ({}, {})".format(x2d, y2d))
        cv2.circle(footballFieldImage, (int(x2d), int(y2d)), 10, (0, 0, 0), -2)
        cv2.putText(footballFieldImage, str(id), (int(x2d), int(y2d) - 14), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (0, 0, 0), 1)


In [36]:
# Iterate over each frame
num = 0
while True:
    # Get next frame
    pickedFrame, frame = video.read()
    print("Current frame no: {}".format(num))
    num += 1

    # Check if there are frames left to analyze
    if not pickedFrame:
        break

    # Get football field frame
    footballFieldImage = cv2.imread('./football_field.jpg')

    # Same procedure as with image tracking
    height, width = frame.shape[:2] 

    # Call function
    applyYoloToFrame(frame)

    # Initialize writer
    if writer is None:
        # Concatenate four chars to generate a fourcc code
        fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        # Params are: filename, apiPreference, fourcc, fps, frameSize, isColor
        writer = cv2.VideoWriter(outputVideo, fourcc, 30, 
                                (frame.shape[1], frame.shape[0]), True)
        writerBEV = cv2.VideoWriter(outputBEV, fourcc, 30, (footballFieldImage.shape[1], 
                                    footballFieldImage.shape[0]), True)

    # Write the output
    writer.write(frame)
    writerBEV.write(footballFieldImage)

# Release pointers (good practice)
writer.release()
writerBEV.release()
video.release() 

Current frame no: 0
Current frame no: 1
Current frame no: 2
Current frame no: 3
Current frame no: 4
Current frame no: 5
Current frame no: 6
Current frame no: 7
Current frame no: 8
Current frame no: 9
Current frame no: 10
Current frame no: 11
Current frame no: 12
Current frame no: 13
Current frame no: 14
Current frame no: 15
Current frame no: 16
Current frame no: 17
Current frame no: 18
Current frame no: 19
Current frame no: 20
Current frame no: 21
Current frame no: 22
Current frame no: 23
Current frame no: 24
Current frame no: 25
Current frame no: 26
Current frame no: 27
Current frame no: 28
Current frame no: 29
Current frame no: 30
Current frame no: 31
Current frame no: 32
Current frame no: 33
Current frame no: 34
Current frame no: 35
Current frame no: 36
Current frame no: 37
Current frame no: 38
Current frame no: 39
Current frame no: 40
Current frame no: 41
Current frame no: 42
Current frame no: 43
Current frame no: 44
Current frame no: 45
Current frame no: 46
Current frame no: 47
Cu

Birds-eye view