# Processing Raw Predictions

We have generated a number of predictions and now we would like to investigate different ways of distilling multiple frame predictions into a single video prediction.

In [1]:
from EasyRetinaFace import EasyRetinaFace
from video_utils import read_frame, plot_detections
from pathlib import Path

In [2]:
import os
import cv2
import sys
import numpy as np 
import pandas as pd 

from pathlib import Path
from video_utils import load_all_metadata
from sklearn.metrics import log_loss

In [3]:
raw_preds = np.load('raw_preds.npy', allow_pickle=True).item()

# Example
first_key = list(raw_preds.keys())[0]
raw_preds[first_key]

array([0.9994698 , 0.99946517, 0.99999034, 0.99983704, 0.9987105 ,
       0.99988353, 0.9967527 , 0.9996308 , 0.9996635 , 0.9999956 ,
       0.99808395, 0.9831117 , 0.9999862 ], dtype=float32)

In [4]:
all_metadata= load_all_metadata()

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))




## Calculate Loss

This competition's [evaluation metric](https://www.kaggle.com/c/deepfake-detection-challenge/overview/evaluation) is log loss which is given as:

$\textrm{LogLoss} = - \frac{1}{n} \sum_{i=1}^n \left[ y_i \log(\hat{y}_i) + (1 - y_i) \log(1 - \hat{y}_i)\right]$

We'll use sklearn's `log_loss()` method.

In [5]:
# Generate sample values
y_true = []
y_hat = []
for i in range(10):
    y_true.append(np.random.randint(2)) # int of 0 or 1
    y_hat.append(np.random.rand())      # float between 0 and 1

# Get loss with sklearn
log_loss(y_true, y_hat)

0.6577101277717478

## Simple Averaging

The most obvious and straightforward approach might be to simply average all of our raw predictions.

In [6]:
# Keep track of predictions for each folder
folder_0_avg_preds = []
folder_0_y_true = []

folder_1_avg_preds = []
folder_1_y_true = []

folder_2_avg_preds = []
folder_2_y_true = []

for path, preds in raw_preds.items():
    # Note that we clip values
    avg = np.mean(preds).clip(0.01, 0.99)

    row = all_metadata.loc[all_metadata['fname'] == path].iloc[0]
    
    if row['directory'] == '../data/dfdc_train_part_0':
        y_true = folder_0_y_true
        avg_preds = folder_0_avg_preds
    elif row['directory'] == '../data/dfdc_train_part_1':
        y_true = folder_1_y_true
        avg_preds = folder_1_avg_preds
    elif row['directory'] == '../data/dfdc_train_part_2':
        y_true = folder_2_y_true
        avg_preds = folder_2_avg_preds
    else:
        raise Exception("Invalid entry")
    
    avg_preds.append(avg)
    y = 1 if row['label'] == 'FAKE' else 0
    y_true.append(y)
    
print("folder 0", log_loss(folder_0_y_true, folder_0_avg_preds))
print("folder 1", log_loss(folder_1_y_true, folder_1_avg_preds))
print("folder 2", log_loss(folder_2_y_true, folder_2_avg_preds))

all_true = folder_0_y_true + folder_1_y_true + folder_2_y_true
all_preds = folder_0_avg_preds + folder_1_avg_preds + folder_2_avg_preds
print("all", log_loss(all_true, all_preds))

folder 0 0.5327780385038207
folder 1 0.5944749533284013
folder 2 0.6016714495100052
all 0.5844726400986539


## Take Max Value

Sometimes a single frame is enough evidence that we've encountered a deep fake. Let's try taking the max value and what happens.

In [8]:
avg_preds = []
y_true = []

for path, preds in raw_preds.items():
    # Note that we clip values
    avg = np.max(preds).clip(0.01, 0.99)
    avg_preds.append(avg)
    
    y = 1 if all_metadata.loc[all_metadata['fname'] == path]['label'].iloc[0] == 'FAKE' else 0
    y_true.append(y)
    
log_loss(y_true, avg_preds)

1.6531126914403507

Much worse!