# **Exploring the Image dataset**

In [None]:
import cv2 as cv
from IPython.display import Video, display
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

import subprocess

plt.rcParams['figure.dpi'] = 150
from PIL import Image


%matplotlib inline
from IPython.display import Video, display

#block those warnings from pandas about setting values on a slice
import warnings
warnings.filterwarnings('ignore')








In [None]:
image_train = pd.read_csv("../input/nfl-impact-detection/image_labels.csv")
image_train.head()

In [None]:
im = cv.imread("../input/nfl-impact-detection/images/" + image_train["image"][0])
plt.imshow(im)

In [None]:
# Set the name of our working image
img_name = image_train['image'][0]
img_name

In [None]:
# Define the path to our selected image
img_path = f"/kaggle/input/nfl-impact-detection/images/{img_name}"

In [None]:
image_path = '../input/nfl-impact-detection'

In [None]:

image_train.info()

# Exploring the Video dataset

In [None]:
import seaborn as sns

In [None]:
video_train = pd.read_csv("../input/nfl-impact-detection/train_labels.csv")
video_train.head()

In [None]:
video_train.query("impact == 1")

In [None]:
!ls ../input/nfl-impact-detection/train

In [None]:
display(Video(data="/kaggle/input/nfl-impact-detection/train/58098_001193_Endzone.mp4", embed=True))


In [None]:
video = cv.VideoCapture("/kaggle/input/nfl-impact-detection/train/58098_001193_Endzone.mp4")

In [None]:
print("Width", video.get(cv.CAP_PROP_FRAME_WIDTH))

print("Height",video.get(cv.CAP_PROP_FRAME_HEIGHT))

print("FPS",video.get(cv.CAP_PROP_FPS))

print("Frame Count",video.get(cv.CAP_PROP_FRAME_COUNT))

In [None]:
ret, frame = video.read()
plt.imshow(frame)

In [None]:
video.set(cv.CAP_PROP_POS_FRAMES, 100)
ret, frame = video.read()
plt.imshow(frame)

# Exploring the Tracking dataset

In [None]:
tracking_train = pd.read_csv("../input/nfl-impact-detection/train_player_tracking.csv")
tracking_train.head()

In [None]:

tracking_train.shape

# Using RCNN model for helmets detectionfor images

In [None]:
import numpy as np
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import patches
from PIL import Image

import os
from tqdm import tqdm
from IPython.display import clear_output

import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:
DATA_PATH = '../input/nfl-impact-detection'

In [None]:
def add_bboxes(ax, img, img_df):
    img_data = img_df[img_df['image'] == img]
    for i in range(img_data.shape[0]):
        data = img_data.iloc[i]
        bbox = patches.Rectangle((
            data['left'],
            data['top']),
            data['width'],
            data['height'],
            linewidth=1,
            edgecolor='r',
            facecolor='None',
            alpha=0.7
        )
        ax.add_patch(bbox)
    return

def plot_random_images(root_path, plot_bboxes=True, verbose=True):
   
    images_path = root_path + '/images/'
    img_labels_df = pd.read_csv(root_path + '/image_labels.csv')
    
    images_list = os.listdir(images_path)
    n_images = len(images_list)
    endzone_images = [image for image in images_list if 'Endzone' in image]
    sideline_images = [image for image in images_list if 'Sideline' in image]

    if verbose:
        print(f'There are {n_images} images in the `images` folder.')
        print(f'  {len(endzone_images)} - images from endzone.')
        print(f'  {len(sideline_images)} - images from sideline.')

    fig, ax = plt.subplots(4, 2, figsize=(14, 12))
    for i in range(4):
        for j in range(2):
            if j == 0:
                random_idx = np.random.randint(len(endzone_images))
                random_img_name = endzone_images[random_idx]
                random_img = Image.open(images_path + random_img_name)
            else:
                random_idx = np.random.randint(len(sideline_images))
                random_img_name = sideline_images[random_idx]
                random_img = Image.open(images_path + random_img_name)
            ax[i][j].imshow(random_img)
            ax[i][j].set_axis_off()
            if plot_bboxes:
                add_bboxes(ax[i][j], random_img_name, img_labels_df)

    ax[0][0].set_title('Endzone images')
    ax[0][1].set_title('Sideline images')
    fig.tight_layout()

In [None]:
plot_random_images(DATA_PATH, plot_bboxes=True, verbose=True)

In [None]:
img_labels_df = pd.read_csv(DATA_PATH + '/image_labels.csv')
plt.figure(figsize=(12, 6))
img_labels_df.label.hist()

# To finetune the Faster R-CNN

In [None]:
class HelmetsDataset(object):
    
    def __init__(self, root_path):
        self.root_path = root_path
        self.images_list = os.listdir(os.path.join(root_path, 'images'))
        self.images_df = pd.read_csv(os.path.join(root_path, 'image_labels.csv'))
        self.labels_dict = {'Helmet': 1,
                           'Helmet-Blurred': 2,
                           'Helmet-Difficult': 3,
                           'Helmet-Sideline': 4,
                           'Helmet-Partial': 5}
        
    def __getitem__(self, idx):
        img_path = os.path.join(self.root_path, 'images', self.images_list[idx])
        img = np.array(Image.open(img_path)) / 255
        img = np.moveaxis(img, 2, 0) # to [C, H, W]
        
        # Collect data about boxes and helmet labels from `image_labels.csv`
        img_data_df = self.images_df[self.images_df['image'] == self.images_list[idx]]     
        n_bboxes = img_data_df.shape[0]
        bboxes = []
        labels = []
        for i in range(n_bboxes):
            img_data = img_data_df.iloc[i]
            x_min = img_data.left
            x_max = img_data.left + img_data.width
            y_min = img_data.top
            y_max = img_data.top + img_data.height
            bboxes.append([x_min, y_min, x_max, y_max])
            label = self.labels_dict[img_data.label]
            labels.append(label)
         
        # Convert data to tensors
        img = torch.as_tensor(img, dtype=torch.float32)    
        bboxes = torch.as_tensor(bboxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        image_id = torch.tensor([idx])
        
        target = {}
        target['boxes'] = bboxes
        target['labels'] = labels
        target['image_id'] = image_id
        
        return img, target
    
    def __len__(self):
        return len(self.images_list)

In [None]:
def get_model(n_classes=6):
    # model pretrained on COCO dataset
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # original number of features in classifier head
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # adapting number of classes
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, n_classes)
    return model

In [None]:
def forward_train(model, data, device):
    imgs, targets = data
    imgs = [image.to(device) for image in imgs]
    targets = [{k: v.to(device) for k, v in tgt.items()} for tgt in targets]
    
    loss_dict = model(imgs, targets) 
    losses = sum(loss for loss in loss_dict.values())
    
    return losses

In [None]:
# One more helper function in order to handle batches with different shapes
def collate_fn(batch):
    return tuple(zip(*batch))


In [None]:
BATCH_SIZE = 8 # Here I am use small batch size in order to avoid kernel crash
N_TEST = 100

dataset = HelmetsDataset(DATA_PATH)


# train and test split
idxs = torch.randperm(len(dataset)).tolist()
dataset_train = torch.utils.data.Subset(dataset, idxs[:-N_TEST])
dataset_test = torch.utils.data.Subset(dataset, idxs[-N_TEST:])
                      
train_dataloader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=4,
                                               collate_fn=collate_fn)
                      
test_dataloader = torch.utils.data.DataLoader(dataset_test,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               num_workers=4,
                                               collate_fn=collate_fn)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f'Device: {device}')
model = get_model()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
model.to(device)


In [None]:
N_ITERS = 100


progress_bar = tqdm(range(N_ITERS))
tr_it = iter(train_dataloader)
loss_log = []
iterations = []

for i in progress_bar:
    try:
        data = next(tr_it)
    except StopIteration:
        tr_it = iter(train_dataloader)
        data = next(tr_it)
    model.train()
    torch.set_grad_enabled(True)
    
    losses = forward_train(model, data, device)
    
    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
        
    loss_log.append(losses.item())
    iterations.append(i)
    progress_bar.set_description(f'batch loss: {losses.item()}, average loss: {np.mean(loss_log)}.')
    
    clear_output(True)
    plt.plot(iterations, loss_log)
    plt.show()

# Model Evaluation

In [None]:
def plot_detected_bboxes(test_img, predictions, n_to_plot=2, score_threshold=0.5):
    
    n = min(len(test_img), n_to_plot)
    
    fig, ax = plt.subplots(1, n, figsize=(16, 8))
    
    for i in range(n):
        img = np.asarray(test_img[i].cpu().numpy() * 255, dtype=np.int64)
        img = np.moveaxis(img, 0, 2)
        img = Image.fromarray(np.uint8(img)).convert('RGB')
        ax[i].imshow(img)
        ax[i].set_axis_off()

        bboxes = predictions[i]['boxes'].cpu().numpy()
        scores = predictions[i]['scores'].cpu().numpy()
        scores_mask = scores > score_threshold
        for bbox in bboxes[scores_mask]:
            patch = patches.Rectangle(
                (bbox[0], bbox[1]),
                bbox[2] - bbox[0], bbox[3] - bbox[1],
                linewidth=1,
                edgecolor='b',
                facecolor='None',
                alpha=0.8)
            ax[i].add_patch(patch)  
        
    fig.tight_layout()
    return 

In [None]:
model.eval()
torch.set_grad_enabled(False)

test_it = iter(test_dataloader)


In [None]:
test_img, test_gt  = next(test_it)
test_img = [image.to(device) for image in test_img]

predictions = model(test_img)

plot_detected_bboxes(test_img, predictions,
                     n_to_plot=4,
                     score_threshold=0.6)

In [None]:
import tensorflow as tf
from tensorflow import keras
print("Tensorflow Version:", tf.__version__)
print("Keras Version:", keras.__version__)

In [None]:
from tensorflow.keras.optimizers import SGD


# Create Sequential Model
model = keras. models.Sequential()
#model.add(keras.layers.Dense(100, activation ="relu"))
model.add(keras.layers.Dense(100, activation ="relu"))
model.add(keras.layers.Dense(100, activation ="relu"))
model.add(keras.layers.Dense(6, activation = "softmax"))

In [None]:
import numpy as np
from scipy.optimize import linear_sum_assignment
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

In [None]:
def iou(bbox1, bbox2):
    bbox1 = [float(x) for x in bbox1]
    bbox2 = [float(x) for x in bbox2]

    (x0_1, y0_1, x1_1, y1_1) = bbox1
    (x0_2, y0_2, x1_2, y1_2) = bbox2

    # get the overlap rectangle
    overlap_x0 = max(x0_1, x0_2)
    overlap_y0 = max(y0_1, y0_2)
    overlap_x1 = min(x1_1, x1_2)
    overlap_y1 = min(y1_1, y1_2)

    # check if there is an overlap
    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
            return 0

    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
    size_union = size_1 + size_2 - size_intersection

    return size_intersection / size_union

In [None]:
def precision_calc(gt_boxes, pred_boxes):
    cost_matix = np.ones((len(gt_boxes), len(pred_boxes)))
    for i, box1 in enumerate(gt_boxes):
        for j, box2 in enumerate(pred_boxes):
            dist = abs(box1[0]-box2[0])
            if dist > 4:
                continue
            iou_score = iou(box1[1:], box2[1:])

            if iou_score < 0.35:
                continue
            else:
                cost_matix[i,j]=0

    row_ind, col_ind = linear_sum_assignment(cost_matix)
    fn = len(gt_boxes) - row_ind.shape[0]
    fp = len(pred_boxes) - col_ind.shape[0]
    tp=0
    for i, j in zip(row_ind, col_ind):
        if cost_matix[i,j]==0:
            tp+=1
        else:
            fp+=1
            fn+=1
    return tp, fp, fn

In [None]:
gt_boxes = [[0, 50,60, 120, 130], [0, 40, 20, 110, 80], [0, 140, 20, 190, 80]]
pred_boxes = [[0, 55, 30, 130, 110], [0, 60, 90, 135, 140], [0, 70, 120, 155, 190]]

im = np.array(Image.new('RGB', (224, 224)))
fig,ax = plt.subplots(1)
ax.imshow(im)

for box in gt_boxes:
    rect = patches.Rectangle((box[1],box[2]),box[3]-box[1],box[4]-box[2],linewidth=1,edgecolor='g',facecolor='none')
    ax.add_patch(rect)
    
for box in pred_boxes:
    rect = patches.Rectangle((box[1],box[2]),box[3]-box[1],box[4]-box[2],linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)

In [None]:
tp, fp, fn = precision_calc(gt_boxes, pred_boxes)
print(f'TP: {tp}, FP: {fp} FN: {fn}')


In [None]:
#Calculating CV score.
testdata = np.load('../input/test-metrics/testdata.npy', allow_pickle=True)
ftp, ffp, ffn = [], [], []
for count, data in enumerate(testdata):
    pred_boxes = data['data']['preds']
    gt_boxes = data['data']['gt']
    tp, fp, fn = precision_calc(gt_boxes, pred_boxes)
    ftp.append(tp)
    ffp.append(fp)
    ffn.append(fn)

tp = np.sum(ftp)
fp = np.sum(ffp)
fn = np.sum(ffn)
precision = tp / (tp + fp + 1e-6)
recall =  tp / (tp + fn +1e-6)
f1_score = 2*(precision*recall)/(precision+recall+1e-6)
print(f'TP: {tp}, FP: {fp}, FN: {fn}, PRECISION: {precision:.4f}, RECALL: {recall:.4f}, F1 SCORE: {f1_score}')

In [None]:

test_data = pd.read_csv("../input/nfl-impact-detection/test_player_tracking.csv")
print(test_data.shape)
# test_data.head()
sub = pd.read_csv("../input/nfl-impact-detection/sample_submission.csv")

In [None]:
from torch.utils.data import Dataset,DataLoader

In [None]:

result_image_ids = []
results_boxes = []data
results_scores = []
for images, image_ids in data_loader:
    box_list, score_list = make_predictions(images, score_threshold=DETECTION_THRESHOLD)
    for i, image in enumerate(images):
        boxes = box_list[i]
        scores = score_list[i]
        image_id = image_ids[i]
        boxes[:, 0] = (boxes[:, 0] * 1280 / 512)
        boxes[:, 1] = (boxes[:, 1] * 720 / 512)
        boxes[:, 2] = (boxes[:, 2] * 1280 / 512)
        boxes[:, 3] = (boxes[:, 3] * 720 / 512)
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        boxes = boxes.astype(np.int32)
        boxes[:, 0] = boxes[:, 0].clip(min=0, max=1280-1)
        boxes[:, 2] = boxes[:, 2].clip(min=0, max=1280-1)
        boxes[:, 1] = boxes[:, 1].clip(min=0, max=720-1)
        boxes[:, 3] = boxes[:, 3].clip(min=0, max=720-1)
        result_image_ids += [image_id]*len(boxes)
        results_boxes.append(boxes)
        results_scores.append(scores)

In [None]:
import pandas as df
import nflimpact
env =  nflimpact.make_env().predict(df)

env.predict(df) # df is a pandas dataframe of your entire submission file