# Ball tracking in a Volleyball environment
> Luca Cazzola, Alessandro Lorenzi

*Signal, Image & Video - MSc in Artificial Intelligence Systems - University of Trento* 

---

requirements and loading modules

In [None]:
#read image
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pylab # this allows you to control figure size
import joblib

#control image size
pylab.rcParams['figure.figsize'] = (15.0, 15.0) # this controls figure size in the notebook

In [None]:
# Load Random Forest classifier
rf_classifier = joblib.load("models/random_forest_model.joblib")
# Load PCA model
pca_model = joblib.load('models/pca_model.joblib')

PRE-PROCESSING
---

Our main objective is to <b>detect the volley ball</b>. <br><br>
Since the ball is often moving, we can start by discriminating what's moving and what's not.
<br>
The good thing is that our <b>camera is fixed</b>, so an easy method would be to apply the frame difference between the current frame and the previous one, but, since the ball is generally moving fast and the camera frame rate isn't so high, this would lead to duplicates.<br>
What we propose is to use instead a **background frame** obtained via the median of N random frames of the video. That's much more robust, as long as the camera stands still.
<br> 

In [None]:
def generate_background_median(video_path, N):
    
    cap = cv2.VideoCapture(video_path)
    
    # Randomly select N frames
    frameIds = cap.get(cv2.CAP_PROP_FRAME_COUNT) * np.random.uniform(size=N)
    
    # Store selected frames in an array
    frames = []
    for fid in frameIds:
        cap.set(cv2.CAP_PROP_POS_FRAMES, fid)
        ret, frame = cap.read()
        frames.append(frame)
    
    cap.release()
    # Calculate the median along the time axis
    medianFrame = np.median(frames, axis=0).astype(dtype=np.uint8)
    
    return medianFrame


def get_moving_objects(curr_frame, background):
    # apply frame difference
    diff_map = cv2.absdiff(curr_frame, background)
    
    # Thresholding
    ## 0 : non-moving object
    ## 1 : moving object
    threshold = np.mean(diff_map) + 10
    
    _, binary_map = cv2.threshold(diff_map, threshold, 255, cv2.THRESH_BINARY)

    # Morphological operations + median blurring
    kernel = np.ones((3, 3), np.uint8)
    binary_map = cv2.erode(binary_map, kernel, iterations=1)
    binary_map = cv2.dilate(binary_map, kernel, iterations=5)
    binary_map = cv2.medianBlur(binary_map, 5)

    return binary_map, diff_map

FEATURE EXTRACTION
---

Now we want to extract the features of the image. <br><br>
First, we use the **HOG (Histogram of Oriented Gradients)** feature extraction process that, basically, counts occurrences of gradient orientation in the image's portion. The descriptor is so focused on the structure or the shape of an object. <br>
Then we apply **PCA (Principal Principal Component component Analysis)** to the extracted features.



In [None]:
def extract_HOG_features(image):
    # Convert the image to grayscale
    grayscale_image = image
    
    # Calculate the HOG features
    hog = cv2.HOGDescriptor((64,64), (8,8), (4,4), (4,4), 16, 1 )
    hog_features = hog.compute(grayscale_image)
    
    return hog_features


def get_features (image) :
    # resize the image
    image = cv2.resize(image, (64, 64))
    
    # apply PCA on extracted HOG features 
    hog_features =  pca_model.transform([extract_HOG_features(image)])[0]
    
    return hog_features
    

REGION IDENTIFICATION
---

Now we use the feature extraction methods represented before to obtain the **bounding boxes** for each frame, extracting the contours. <br>
Once you've done that, it's time to search the ball into the returned boxes, so we apply **classification** with a confidence level that we have set to 0.8 (but can be changed). <br><br>

At the end, we provide a graphical representation of the boxes extracted (red) and the ones where the ball is detected (green). 

In [None]:
def filter_boxes(x, y, w, h):
    # Box size limits
    cond1 = (150 <= (w * h) <= 3000)
    # Aspect ratio limits
    cond2 = (0.5 <= (w / h) <= 2)
    
    return cond1 and cond2

    
def extract_bboxes (binary_map, diff_map, curr_frame_RGB) :
    
    # Extract contours and wrap them with bounding boxes
    contours, _ = cv2.findContours(binary_map, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    bounding_boxes = [cv2.boundingRect(contour) for contour in contours if filter_boxes(*cv2.boundingRect(contour))]
        
    if len(bounding_boxes) > 0 :
        # Extract features from detected bounding boxes
        features = [get_features(diff_map[y:y+h, x:x+w]) for x, y, w, h in bounding_boxes]

        # Run classification
        predictions = rf_classifier.predict_proba(features)

        # Show all detected boxes
        # If confidence(ball) is at least probability_threshold, box is displayed in GREEN
        probability_threshold = 0.8
    
        for i in range(len(predictions)):
            _, prob_ball = predictions[i]
            x, y, w, h = bounding_boxes[i]

            if prob_ball >= probability_threshold:
                cv2.rectangle(curr_frame_RGB, (x, y), (x + w, y + h), (0, 255, 0), 2)
            else:
                cv2.rectangle(curr_frame_RGB, (x, y), (x + w, y + h), (255, 0, 0), 2)          

    return curr_frame_RGB

VIDEO TESTING
---

In [11]:
# Open the video file : it's possible to change the test between these two videos
#video_path = 'videos/vid1_cut.mp4'
video_path = 'videos/vid3-cut.mp4'

# Generate background frame
samples_bck_frames = 50
background = generate_background_median(video_path, samples_bck_frames)
grayMedianBackground = cv2.cvtColor(background, cv2.COLOR_BGR2GRAY)

# Create a new window for displaying the elaborated frames
window_name = "Detections"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.setWindowProperty(window_name, cv2.WND_PROP_VISIBLE, 10)
cv2.resizeWindow(window_name, 800, 600)

video_capture = cv2.VideoCapture(video_path)

while True:
    ret, curr_frame = video_capture.read()  # Read the current frame

    if not ret:
        break
    
    # Convert the frame to grayscale
    gray_curr_frame = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
    curr_frame_RBG = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2RGB)
    
    # Get image of moving objects
    binary_map, diff_map = get_moving_objects(gray_curr_frame, grayMedianBackground)

    # Get bounding boxes of moving objects
    detections = extract_bboxes(binary_map, diff_map, curr_frame_RBG)

    # Display the resulting frame
    detections = cv2.cvtColor(detections, cv2.COLOR_BGR2RGB)
    cv2.imshow(window_name, detections)
    
    # Check for 'q' or the 'Esc' key
    if cv2.waitKey(1) == ord('q') or cv2.waitKey(1) & 0xFF == 27:
        break
 
# Release the video capture object
video_capture.release()
cv2.destroyAllWindows()
