## Inference with Single Model

In [1]:
! pip install ../input/mmcvwhl/addict-2.2.1-py3-none-any.whl
! pip install ../input/mmcvwhl/mmcv-0.4.0-cp36-cp36m-linux_x86_64.whl
! pip install ../input/facenet-pytorch/facenet_pytorch-2.2.9-py3-none-any.whl
! pip install ../input/efficientnet-pytorch/efficientnet_pytorch-0.6.3-cp36-none-any.whl

Processing /kaggle/input/mmcvwhl/addict-2.2.1-py3-none-any.whl
Installing collected packages: addict
Successfully installed addict-2.2.1
Processing /kaggle/input/mmcvwhl/mmcv-0.4.0-cp36-cp36m-linux_x86_64.whl
Installing collected packages: mmcv
Successfully installed mmcv-0.4.0
Processing /kaggle/input/facenet-pytorch/facenet_pytorch-2.2.9-py3-none-any.whl
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.2.9
Processing /kaggle/input/efficientnet-pytorch/efficientnet_pytorch-0.6.3-cp36-none-any.whl
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.6.3


In [2]:
import cv2, mmcv
from base64 import b64encode
from IPython.display import display, HTML
from fastai.core import *
from fastai.vision import *
from sklearn.cluster import KMeans

sys.path.append('../input/realfakekode')
from kgl_deepfake.face_detection import *
from kgl_deepfake.EasyBlazeFace import EasyBlazeFace
from kgl_deepfake.EasyRetinaFace import EasyRetinaFace
from kgl_deepfake.EasyMTCNN import EasyMTCNN
#from kgl_deepfake.efficientnet import EfficientNet
from efficientnet_pytorch import EfficientNet

import holoviews as hv

In [3]:
hv.extension('bokeh')

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

## FAKE/REAL classifier

In [5]:
# Load our model and weights
model = EfficientNet.from_name('efficientnet-b1', override_params={'num_classes': 2})
state_dict = torch.load('../input/trained-models/cutmix_30ep.pth', map_location='cpu')
model.load_state_dict(state_dict['model'])
model = model.to(device)

## Face detector

In [6]:
easyBlazeFace = EasyBlazeFace(weights='../input/realfakekode/face_detection/blazeface.pth', 
                              anchors='../input/realfakekode/face_detection/anchors.npy')
easyRetinaFace = EasyRetinaFace(path='../input/realfakekode/face_detection/Pytorch_Retinaface/weights/Resnet50_Final.pth', 
                                cpu=False if device.type=='cuda' else True)
easyMTCNN = EasyMTCNN(path_pnet='../input/realfakekode/facenet_pytorch/data/pnet.pt',
                      path_rnet='../input/realfakekode/facenet_pytorch/data/rnet.pt',
                      path_onet='../input/realfakekode/facenet_pytorch/data/onet.pt')

Loading pretrained model from ../input/realfakekode/face_detection/Pytorch_Retinaface/weights/Resnet50_Final.pth
remove prefix 'module.'
Missing keys:0
Unused checkpoint keys:0
Used keys:456


## Data

In [7]:
def get_top_loss(vant, path_raw_pred):
    '''
    Sort previously inferred videos by their loss values, and return the top losses.
    '''
    raw_pred = np.load(path_raw_pred, allow_pickle=True).item()
    vns = list(raw_pred.keys())
    median_prob = [np.median(raw_pred[vn]) for vn in vns]    
    dfvids = pd.DataFrame({'fname':vns, 'median_prob':median_prob})
    dfvids['label'] = dfvids.fname.apply(lambda o: vant.label[vant.fname==o].values[0])
    dfvids['target'] = dfvids.label.apply(lambda o: 1 if o=='FAKE' else 0)
    dfvids['bce'] = dfvids.apply(lambda o: -np.log(o.median_prob) if o.target==1 else -np.log(1 - o.median_prob), axis=1)
    # Above equivalent to:
    # with torch.no_grad():
    #     dfvids['bce'] = dfvids.apply(lambda o: loss_func(tensor(o.median_prob), tensor(o.target).float()), axis=1)
    dfvids_toploss = dfvids.sort_values('bce', axis=0, ascending=False).iloc[:].reset_index(drop=True)
    return dfvids_toploss

In [8]:
def show_video(fname):
    vid1 = open(fname,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(vid1).decode()
    display(HTML("""
    <video width=600 controls>
          <source src="%s" type="video/mp4">
    </video>
    """ % data_url))

In [9]:
# list of videos with 2 people
# twopeeps = ['pcoxcmtroa.mp4', 'sbzhqdbslb.mp4', 'lclrhuuwnj.mp4']

In [10]:
# SOURCE_TEST = Path('../input/deepfake-detection-challenge/test_videos/')
# submission = pd.read_csv('../input/deepfake-detection-challenge/sample_submission.csv')
SOURCE_TEST = Path('../input/cropped-faces/cropped_faces/valid_videos/')

In [11]:
vant = pd.read_csv('../input/cropped-faces/cropped_faces/valid.csv')

In [12]:
dfvids_toploss = get_top_loss(vant, '../input/valid-videos-raw-preds/raw_preds_cutmix30.npy')
dfvids_toploss.head(3)

Unnamed: 0,fname,median_prob,label,target,bce
0,ugzpmmyogi.mp4,0.001879,FAKE,1,6.276995
1,ehrtdalgon.mp4,0.002394,FAKE,1,6.034939
2,czupmkgroe.mp4,0.002684,FAKE,1,5.920355


In [13]:
show_video(SOURCE_TEST/dfvids_toploss.fname[0])

## Definitions

In [14]:
def vid2frames(vid, iframes):
    idx_frames = ((i, o) for i, o in enumerate(vid) if i in iframes)
    iframes, frames = zip(*idx_frames)
    frames = [cv2.cvtColor(o, cv2.COLOR_BGR2RGB) for o in frames]
    return iframes, frames

from facenet_pytorch.models.utils.detect_face import extract_face
def extract_faces_from_frame(frame, det, image_size=256, margin=20):
    '''
    Adpated from facenet_pytorch.
    '''
    if len(det.shape) == 1: return torch.Tensor([])
    im, box_im = PIL.Image.fromarray(frame), det[:,:4]
    faces_im = []
    for i, box in enumerate(box_im):
        face = extract_face(im, box, image_size, margin)
        faces_im.append(face)
    faces_im = torch.Tensor([]) if not faces_im else torch.stack(faces_im) 
    return faces_im

def get_faces_vid(frames, detector):
    dets = detector.detect_on_multiple_frames(frames)
    faces_vid = []
    for f, d in zip(frames, dets):
        faces_frame = extract_faces_from_frame(f, d, margin=20)
        faces_vid.append(faces_frame)
    return torch.cat(faces_vid)

mean_norm = tensor(imagenet_stats[0])
std_norm = tensor(imagenet_stats[1])

def predict_faces_vid(faces_vid):
    xb = faces_vid / 255
    xb = xb.sub_(mean_norm[None,:,None,None]).div_(std_norm[None,:,None,None])
    xb = xb.to(device)
    model.eval()
    with torch.no_grad(): 
        pred = torch.softmax(model(xb), axis=-1).detach().cpu().numpy()
    return pred

def fix_prob(prob, fk_ccenter=.5, fk_ffrac=.15):
    km = KMeans(n_clusters=2); km.fit(prob[...,None])
    ccs = km.cluster_centers_.reshape(-1)
    cfs = np.array([(km.labels_==i).mean() for i in range_of(ccs)])
    if ccs.max() > fk_ccenter and cfs[ccs.argmax()] > fk_ffrac: vid_is_fake = True
    else: vid_is_fake = False
    res = prob.copy()
    if vid_is_fake: res[km.labels_==ccs.argmin()] = ccs.max()
    else: res[km.labels_==ccs.argmax()] = ccs.min()
    return vid_is_fake, res

def agg_probs(probs): return np.median(probs)#probs.mean()

def predict_vids(vns, detector=easyMTCNN, nface_min=10):
    probs_vid = []
    pb = progress_bar(vns)
    for vn in pb:
        vn = SOURCE_TEST/vn
        try:
            vid = mmcv.VideoReader(str(vn))
            iframes = np.sort(np.random.randint(low=0, high=len(vid), size=32))
            iframes, frames = vid2frames(vid, iframes)
            faces = get_faces_vid(frames, detector)
            if len(faces) < nface_min: faces = get_faces_vid(frames, easyRetinaFace)
            if len(faces) < nface_min: prob = .5
            else:
                prob = predict_faces_vid(faces)[:,0]
                vid_is_fake, prob = fix_prob(prob)
                prob = agg_probs(prob)
            pb.comment = f'Aggregated probability: {prob}'
            probs_vid.append(prob)
        except Exception as e:
            print(vn); print(e)
            probs_vid.append(.5)
    return np.array(probs_vid)

def hv_faces(faces, probs=None):
    if probs is not None: assert len(faces)==len(probs)
    figs = []
    for i in range_of(faces):
        im_npy = faces[i].permute(1, 2, 0).type(torch.uint8).numpy()
        fig = hv.RGB(im_npy)
        if probs is not None: fig *= hv.Text(.35, .45, f'{probs[i]:.4f}')
        figs.append(fig)

    return hv.Layout(figs).cols(8).opts(hv.opts.RGB(xaxis=None, yaxis=None, width=150, height=150),
                                        hv.opts.Text(color='cyan', text_font_size='7.5pt'))

## Inference on all videos

In [15]:
vns = dfvids_toploss.fname
detector = easyMTCNN
nface_min = 20

probs_vid = predict_vids(vns, detector, nface_min=nface_min)

## Evaluation

In [16]:
dfvids_toploss['fixed_median_prob'] = probs_vid
dfvids_toploss['fixed_bce'] = dfvids_toploss.apply(lambda o: -np.log(o.fixed_median_prob) if o.target==1 
                                                   else -np.log(1-o.fixed_median_prob), axis=1)

In [17]:
dfvids_toploss

Unnamed: 0,fname,median_prob,label,target,bce,fixed_median_prob,fixed_bce
0,ugzpmmyogi.mp4,0.001879,FAKE,1,6.276995,0.515915,0.661813
1,ehrtdalgon.mp4,0.002394,FAKE,1,6.034939,0.085303,2.461545
2,czupmkgroe.mp4,0.002684,FAKE,1,5.920355,0.008276,4.794347
3,gnnbtnjqto.mp4,0.002743,FAKE,1,5.898823,0.856266,0.155174
4,lvomvkrszu.mp4,0.003035,FAKE,1,5.797603,0.059657,2.819141
...,...,...,...,...,...,...,...
1143,tnecbhfzba.mp4,0.999765,FAKE,1,0.000235,0.998718,0.001283
1144,zltwghgilz.mp4,0.999778,FAKE,1,0.000222,0.994429,0.005587
1145,gjxfowkaeo.mp4,0.999813,FAKE,1,0.000187,0.945971,0.055544
1146,umpscxhfsk.mp4,0.999847,FAKE,1,0.000153,0.999667,0.000334


In [18]:
print(f'''
Using KMeans.
BCELoss before: {dfvids_toploss.bce.mean()}
BCELoss after: {dfvids_toploss.fixed_bce.mean()}
''')


Using KMeans.
BCELoss before: 0.4159091078168364
BCELoss after: 0.4056842243000317



In [19]:
# pd.Series(np.array(probs_vid)).hist();

In [20]:
# labels = vant.set_index('fname').loc[vns].label
# targets = labels.apply(lambda o: 1 if o=='FAKE' else 0).values
# loss_func = nn.BCELoss()
# with torch.no_grad(): loss = loss_func(tensor(probs_vid), tensor(targets).float())

## Write submission-format .csv

In [21]:
# df_vids = pd.DataFrame({'filename':vns, 'label':probs_vid})
# df_vids.to_csv('submission.csv', index=False)