<a href="https://colab.research.google.com/github/ryanro97/player-detector/blob/master/PlayerDetectorPredictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Player Detector Predictor
---
Predictor for frame by frame player detection, using a custom dataset, and weights trained by a our trainer. Implemented using [PyTorch's Faster R-CNN model with a ResNet-50-FPN Backbone](https://pytorch.org/docs/stable/torchvision/models.html#faster-r-cnn).

<br />

### Directory Hierarchy:
```
PlayerDetector
├── data
    ├── train
        ├── images
            ├── *.jpg
        ├── targets
            ├── classes.txt
            ├── *.txt
    ├── predict
        ├── video
            ├── *.mp4
├── PlayerDetectorTrainer.ipynb
├── PlayerDetectorPredictor.ipynb
```


### Install Dependencies for Google Colab

In [0]:
!pip3 install numpy opencv-python pillow scikit-video torch torchvision

### Mount Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

### Imports

In [0]:
import cv2
import numpy as np
import os
import skvideo.io
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor, ToPILImage
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from PIL import Image

### Custom PyTorch Dataset
Handles frame extraction from the input video, and also provides dataset details, such as prediction directory, labels, video_name, and frame shape.

<br />

#### Errors
```
IOError: Class read error
IndexError: Only one file should be in the video folder
TypeError: Video file is not of .mp4 format
IOError: Frame write error
IOError: Video read error
```

In [0]:
class PlayerPredictorDataset(Dataset):
    def __init__(self):
        cwd = os.getcwd()
        self.predict_dir = os.path.join(cwd, 'predict')

        labels_file = os.path.join(cwd, 'data/train/targets/classes.txt')
        self.labels = []
        with open(labels_file) as f:
            for line in f:
                self.labels.append(line.strip())
        if not len(self.labels):
            raise IOError('Class read error')

        video_dir = os.path.join(self.predict_dir, 'video')
        video = os.listdir(video_dir)
        if len(video) > 1 or not len(video):
            index_error = \
                'There should one and only one file in the video folder'
            raise IndexError(index_error)
        elif os.path.splitext(video[0])[1] != '.mp4':
            raise TypeError('Video file is not of .mp4 format')

        self.video_name = os.path.splitext(video[0])[0]
        vc = cv2.VideoCapture(os.path.join(video_dir, video[0]))

        self.frames_dir = os.path.join(self.predict_dir, 'frames')
        if not os.path.exists(self.frames_dir):
            os.makedirs('frames')
        else:
            frames_dir_list = os.listdir(self.frames_dir)
            for frame in frames_dir_list:
                os.remove(os.path.join(self.frames_dir, frame))
        os.chdir(self.frames_dir)

        self.length = 0
        frame = None
        while True:
            success, image = vc.read()
            if success:
                frame = image
                self.length += 1
                written = cv2.imwrite('frame%04d.jpg' % self.length, image)
                if not written:
                    raise IOError('Frame write error')
            else:
                break
        if frame is None:
            raise IOError('Video read error')
        
        self.h, self.w = frame.shape[:2]
        self.images = sorted(os.listdir(self.frames_dir))
        os.chdir(cwd)
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.frames_dir, self.images[idx])
        image = ToTensor()(Image.open(image_path).convert("RGB"))
        
        return image

    def getPredictDir(self):
        return self.predict_dir
    
    def getLabels(self):
        return self.labels

    def getVideoName(self):
        return self.video_name

    def getShape(self):
        return self.h, self.w

### Evaluate Function
Takes in the working directory (root directory in the data hierarchy diagram) and uses the trained weights, and applies predictions to each frame, and then reconstructs a video with predictions.

<br />

#### Parameters
```
working_dir: String representation of the working directory
```

In [0]:
def evaluate(working_dir):
    os.chdir(working_dir)

    dataset = PlayerPredictorDataset()
    data_loader = DataLoader(dataset)
    predict_dir = dataset.getPredictDir()
    labels = dataset.getLabels()
    h, w = dataset.getShape()
    video_name = dataset.getVideoName()

    model = fasterrcnn_resnet50_fpn(num_classes=len(labels))
    model.load_state_dict(torch.load('weights.pt'))
    device = torch.device('cuda') if torch.cuda.is_available() \
             else torch.device('cpu')
    model.to(device)

    model.eval()
    cpu_device = torch.device("cpu")
    with torch.no_grad():
        i = 0
        video = np.empty([len(data_loader), h, w, 3]).astype(np.uint8)
        for images in data_loader:
            images = list(image.to(device) for image in images)
            outputs = [{k: v.to(cpu_device) for k, v in t.items()} \
                       for t in model(images)]
            
            img = np.array(ToPILImage()(images[0].cpu()))

            if len(outputs[0]['boxes']) >= 1:
                x0, y0, x1, y1 = outputs[0]['boxes'][0]
                cv2.rectangle(img, (x0, y0), (x1, y1), (255, 255, 255), 2)
                score = outputs[0]['scores'][0].item() * 100
                label = labels[outputs[0]['labels'][0] - 1]
                text = '%s %d%%' % (label, score)
                font = cv2.FONT_HERSHEY_PLAIN
                text_w, text_h = cv2.getTextSize(text, font, 1, 1)[0]
                cv2.rectangle(img, (x0 - 1 , y0 - 1), \
                              (x0 + text_w + 20, y0 - text_h - 20), \
                              (255, 255, 255), cv2.FILLED)
                cv2.putText(img, text, (x0 + 9, y0 - 9), font, 1, (0, 0, 0))
            video[i] = img
            i += 1
        os.chdir(predict_dir)
        skvideo.io.vwrite(video_name + '_predicted.mp4', video)
        os.chdir(working_dir)

### Evaluating the video

In [0]:
evaluate('/content/drive/My Drive/PlayerDetector')