In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import patches
import imageio

#from tqdm import tqdm_notebook as tqdm
#from tqdm import tqdm 
from tqdm.notebook import tqdm as tqdm

import cv2
import os
import re

import random
import subprocess

from PIL import Image
from IPython.display import Video, display

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

import ast

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

In [None]:
DATA_PATH = '../input/nfl-impact-detection'
im_path = '../input/nfl-impact-detection/images'

train_label = pd.read_csv("../input/nfl-impact-detection/train_labels.csv")

# Image Data Overview

In [None]:
img_labels = pd.read_csv('/kaggle/input/nfl-impact-detection/image_labels.csv')
img_labels.head()

In [None]:
img_name = img_labels['image'][50]
img_name

In [None]:
img_path = f"/kaggle/input/nfl-impact-detection/images/{img_name}"

In [None]:
# Read it and plot the image
img = imageio.imread(img_path)
plt.figure(figsize=(20,10))
plt.imshow(img)
plt.show()


**Function for adding the bounding boxes from label to image**

In [None]:
def add_img_boxes(image_name, image_labels):
    # Label colors for bounding boxes
    HELMET_COLOR = (255,0,0)  # Red
    
    boxes = img_labels.loc[img_labels['image'] == img_name]
    for j, box in boxes.iterrows():
        color = HELMET_COLOR
        cv2.rectangle(img, (box.left, box.top), (box.left + box.width, box.top + box.height), color, thickness = 2)
        
    plt.figure(figsize=(20,10))
    plt.imshow(img)
    plt.show()
    

In [None]:
add_img_boxes(img_name, img_labels)

# Video Data

In [None]:
# Read the data from the video label file
video_labels = pd.read_csv('/kaggle/input/nfl-impact-detection/train_labels.csv')
video_labels.head()

In [None]:
video_name = video_labels['video'][100]
video_name

In [None]:
video_path = f"/kaggle/input/nfl-impact-detection/train/{video_name}"
display(Video(data=video_path, embed=True))

**Develope a function to add bounding boxes to every frame in the video**

In [None]:
def annotate_video(video_path: str, video_labels: pd.DataFrame) -> str:
    VIDEO_CODEC = "MP4V"
    HELMET_COLOR = (155, 0, 0)    #Red
    IMPACT_COLOR = (0, 0, 0)  # Black
    video_name = os.path.basename(video_path)
    
    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    output_path = "labeled_" + video_name
    tmp_output_path = "tmp_" + output_path
    output_video = cv2.VideoWriter(tmp_output_path, cv2.VideoWriter_fourcc(*VIDEO_CODEC), fps, (width, height))
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        
        # We need to add 1 to the frame count to match the label frame index that starts at 1
        frame += 1
        
        # Let's add a frame index to the video so we can track where we are
        img_name = f"{video_name}_frame{frame}"
        cv2.putText(img, img_name, (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, HELMET_COLOR, thickness=2)
    
        # Now, add the boxes
        boxes = video_labels.query("video == @video_name and frame == @frame")
        for box in boxes.itertuples(index=False):
            if box.impact == 1 and box.confidence > 1 and box.visibility > 0:    
                color, thickness = IMPACT_COLOR, 2
            else:
                color, thickness = HELMET_COLOR, 1
            # Add a box around the helmet
            cv2.rectangle(img, (box.left, box.top), (box.left + box.width, box.top + box.height), color, thickness=thickness)
            cv2.putText(img, box.label, (box.left, max(0, box.top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, thickness=2)
        output_video.write(img)
    output_video.release()
    
    # Not all browsers support the codec, we will re-load the file at tmp_output_path and convert to a codec that is more broadly readable using ffmpeg
    if os.path.exists(output_path):
        os.remove(output_path)
    subprocess.run(["ffmpeg", "-i", tmp_output_path, "-crf", "18", "-preset", "veryfast", "-vcodec", "libx264", output_path])
    os.remove(tmp_output_path)
    
    return output_path

In [None]:
labeled_video = annotate_video(f"/kaggle/input/nfl-impact-detection/train/{video_name}", video_labels)
display(Video(data=labeled_video, embed=True))

# Model Faster RCNN

In [None]:
class NFLDataset(object):
    
    def __init__(self, root_path):
        self.root_path = root_path
        self.images_list = os.listdir(os.path.join(root_path, 'images'))
        self.images_df = pd.read_csv(os.path.join(root_path, 'image_labels.csv'))
        self.labels_dict = {'Helmet': 1,
                           'Helmet-Blurred': 2,
                           'Helmet-Difficult': 3,
                           'Helmet-Sideline': 4,
                           'Helmet-Partial': 5}
        
    def __getitem__(self, idx):
        img_path = os.path.join(self.root_path, 'images', self.images_list[idx])
        img = np.array(Image.open(img_path)) / 255
        img = np.moveaxis(img, 2, 0) # to [C, H, W]
        
        # Collect data about boxes and helmet labels from `image_labels.csv`
        img_data_df = self.images_df[self.images_df['image'] == self.images_list[idx]]     
        n_bboxes = img_data_df.shape[0]
        bboxes = []
        labels = []
        for i in range(n_bboxes):
            img_data = img_data_df.iloc[i]
            x_min = img_data.left
            x_max = img_data.left + img_data.width
            y_min = img_data.top
            y_max = img_data.top + img_data.height
            bboxes.append([x_min, y_min, x_max, y_max])
            label = self.labels_dict[img_data.label]
            labels.append(label)
         
        # Convert data to tensors
        img = torch.as_tensor(img, dtype=torch.float32)    
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([idx])
        
        target = {}
        target['boxes'] = bboxes
        target['labels'] = labels
        target['image_id'] = image_id
        
        return img, target
    
    def __len__(self):
        return len(self.images_list)

In [None]:
#albumentation 
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 6  
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))


dataset = NFLDataset(DATA_PATH)

indices = torch.randperm(len(dataset)).tolist()
train_cnt = int(0.9*len(indices))

train_dataset = torch.utils.data.Subset(dataset, indices[:train_cnt])
valid_dataset = torch.utils.data.Subset(dataset, indices[train_cnt:])

train_data_loader = torch.utils.data.DataLoader(
                train_dataset,
                batch_size = 8,
                shuffle = False,
                collate_fn = collate_fn)

valid_data_loader = torch.utils.data.DataLoader(
                valid_dataset,
                batch_size = 8,
                shuffle = False,
                collate_fn = collate_fn)

In [None]:
train_dataset

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
torch.cuda.empty_cache()
torch.cuda.memory_summary(device=None, abbreviated=False)

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
lr_scheduler = None



In [None]:
def forward_train(model, data, device):
    imgs, targets = data
    imgs = [image.to(device) for image in imgs]
    targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in targets]
    
    loss_dict = model(imgs, targets) 
    losses = sum(loss for loss in loss_dict.values())
    
    return losses

In [None]:
N_ITERS = 100


progress_bar = tqdm(range(N_ITERS))
tr_it = iter(train_data_loader)
loss_log = []
iterations = []

for i in progress_bar:
    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_data_loader)
        data = next(tr_it)
    model.train()
    torch.set_grad_enabled(True)
    imgs, targets = data
    imgs = [image.to(device) for image in imgs]
    targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in targets]
    loss_dict = model(imgs, targets)
    losses = sum(loss for loss in loss_dict.values())
    
    
    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
        
    loss_log.append(losses.item())
    iterations.append(i)
    progress_bar.set_description(f'batch loss: {losses.item()}, average loss: {np.mean(loss_log)}.')
    

In [None]:
plt.plot(iterations, loss_log)
plt.show()

In [None]:
valid_data_loader = torch.utils.data.DataLoader(
                valid_dataset,
                batch_size = 8,
                shuffle = False,
                collate_fn = collate_fn)

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for s, b in zip(scores, boxes.astype(int)):
        pred_strings.append(f'{s:.4f} {b[0]} {b[1]} {b[2] - b[0]} {b[3] - b[1]}')

    return " ".join(pred_strings)

In [None]:
detection_threshold = 0.5
results = []
device = 'cuda'
model.eval()
for images, image_ids in valid_data_loader:

    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }

        
        results.append(result)

In [None]:
def plot_detected_bboxes(test_img, predictions, n_to_plot=2, score_threshold=0.6):
    
    n = min(len(test_img), n_to_plot)
    
    fig, ax = plt.subplots(1, n, figsize=(20, 8))
    
    for i in range(n):
        img = np.asarray(test_img[i].cpu().numpy() * 255, dtype=np.int64)
        img = np.moveaxis(img, 0, 2)
        img = Image.fromarray(np.uint8(img)).convert('RGB')
        ax[i].imshow(img)
        ax[i].set_axis_off()

        bboxes = predictions[i]['boxes'].cpu().numpy()
        scores = predictions[i]['scores'].cpu().numpy()
        scores_mask = scores > score_threshold
        for bbox in bboxes[scores_mask]:
            patch = patches.Rectangle(
                (bbox[0], bbox[1]),
                bbox[2] - bbox[0], bbox[3] - bbox[1],
                linewidth=2,
                edgecolor='r',
                facecolor='None',
                alpha=0.8)
            ax[i].add_patch(patch)  
        
    fig.tight_layout()
    return 

In [None]:
model.eval()
torch.set_grad_enabled(False)

test_it = iter(valid_data_loader)

In [None]:
test_img, test_gt  = next(test_it)
test_img = [image.to(device) for image in test_img]

predictions = model(test_img)

plot_detected_bboxes(test_img, predictions,
                     n_to_plot=2,
                     score_threshold=0.6)

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.head()

In [None]:
test_df.to_csv('submission.csv', index=False)