In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys

sys.path.append('/content/drive/My Drive/ComputerVisionProject')


In [3]:
from models import *
from utils import *

import os
import time
import datetime
import random
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.autograd import Variable
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image, ImageEnhance


In [4]:
config_path = '/content/drive/My Drive/ComputerVisionProject/config/yolov3.cfg'
weights_path = '/content/drive/My Drive/ComputerVisionProject/config/yolov3.weights'
class_path = '/content/drive/My Drive/ComputerVisionProject/config/coco.names'
img_size = 416
conf_thres = 0.8
nms_thres = 0.4

# Load model and weights
model = Darknet(config_path, img_size=img_size)
model.load_weights(weights_path)
try:
  model.cuda()
except:
  print("No cuda GPU available")
model.eval()
classes = utils.load_classes(class_path)
try:
  Tensor = torch.cuda.FloatTensor
except:
  Tensor = torch.FloatTensor




In [5]:
def detect_image(img):
    # scale and pad image
    ratio = min(img_size/img.size[0], img_size/img.size[1])
    imw = round(img.size[0] * ratio)
    imh = round(img.size[1] * ratio)
    img_transforms = transforms.Compose([transforms.Resize((imh, imw)),
                                         transforms.Pad((max(int((imh-imw)/2), 0), max(int((imw-imh)/2), 0), max(int((imh-imw)/2), 0), max(int((imw-imh)/2), 0)),
                                                        (128, 128, 128)),
                                         transforms.ToTensor(),
                                         ])
    # convert image to Tensor
    image_tensor = img_transforms(img).float()
    image_tensor = image_tensor.unsqueeze_(0)
    input_img = Variable(image_tensor.type(Tensor))
    # run inference on the model and get detections
    with torch.no_grad():
        detections = model(input_img)
        detections = utils.non_max_suppression(
            detections, 80, conf_thres, nms_thres)
    return detections[0]


In [6]:
import time
def track_ball(video_path):
    vid = cv2.VideoCapture(video_path)
    ret, frame = vid.read()
    frame_count = 0
    rows = []
    
    while(ret):
        frame1 = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Only take a portion in the middle of the frame
        # Reduces size of data significantly
        frame1 = frame1[142:558,442:858] 
        pilimg = Image.fromarray(frame1)
        try:
          detections = detect_image(pilimg).cpu()
        except:
          # If there are no detections, converting to cpu will fail
          ret, frame = vid.read()
          frame_count += 1
          continue
        img = np.array(pilimg) 
        pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
        pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
        unpad_h = img_size - pad_y
        unpad_w = img_size - pad_x

	    # If the network finds detections, save them
        found_ball = False
        if detections is not None:
            unique_labels = detections[:, -1].cpu().unique()

            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                y1 = ((y1 - pad_y // 2) / unpad_h) * img.shape[0]
                x1 = ((x1 - pad_x // 2) / unpad_w) * img.shape[1]
                c = classes[int(cls_pred)]
                # Only record ball detections
                if c == "sports ball":
                    found_ball = True
                    rows.append([frame_count, c, x1.numpy(),y1.numpy()])
        ret, frame = vid.read()
        frame_count += 1
    vid.release()
    df = pd.DataFrame(rows, columns = ['frame', 'class', 'bb_left', 'bb_top'])
    return df



In [7]:
# This will run our detections on a michael king pitch (included in repo, but path may need to be changed)
# Detections may not be possible if we're not on a GPU.
tracks_cole_1 = track_ball("/content/drive/My Drive/ComputerVisionProject/FF/gerrit_coleFF1.mp4")


In [8]:
# This outputs the frame the ball is detected in followed by the coordinated it was detected in.
for i in range(len(tracks_cole_1)):
    print(tracks_cole_1.iloc[i]['frame'], tracks_cole_1.iloc[i]['bb_left'], tracks_cole_1.iloc[i]['bb_top'])

188 136.3104 221.62917
189 136.57243 221.39479
190 136.63203 221.77771
191 137.04167 222.4861
192 138.5574 222.70114
193 138.8856 222.93028
194 139.31204 223.30595
197 143.21854 227.12573
198 143.60808 227.89871
199 146.39345 228.61873
200 148.2441 230.7999
201 149.72388 232.6464
202 151.2061 234.16142
203 153.63744 236.43237
204 156.34929 238.53055
205 158.09476 240.6397
206 160.18692 241.79623
325 199.14818 63.99612
326 194.70287 60.231365
327 188.8749 55.29365
328 182.68867 51.669636
354 49.23777 60.494675
355 43.764824 65.27669
356 38.473267 70.63305
