# Get a single face from a video

In [1]:
#default_exp nb_01b

In [1]:
#export
from fastai.vision import *
from kgl_deepfake.nb_00 import *
from kgl_deepfake.nb_01 import *
from IPython.display import HTML
import cv2
import pandas as pd
from facenet_pytorch import MTCNN

In [2]:
torch.multiprocessing.set_start_method('spawn')

### Data

In [3]:
SOURCE = Path('../data/train_sample_videos/')

In [4]:
f = get_files(SOURCE, extensions=['.json'])[0]
annots = pd.read_json(f).T
annots.reset_index(inplace=True)
annots.rename({'index':'fname'}, axis=1, inplace=True)
annots.head()

Unnamed: 0,fname,label,split,original
0,aagfhgtpmv.mp4,FAKE,train,vudstovrck.mp4
1,aapnvogymq.mp4,FAKE,train,jdubbvfswz.mp4
2,abarnvbtwb.mp4,REAL,train,
3,abofeumbvv.mp4,FAKE,train,atvmxvwyns.mp4
4,abqwwspghj.mp4,FAKE,train,qzimuostzz.mp4


In [5]:
fn = SOURCE/annots.fname[0]
fn

PosixPath('../data/train_sample_videos/aagfhgtpmv.mp4')

### Get the first detected face from a video

In [31]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
detector = MTCNN(device=device, post_process=False, select_largest=False)

In [9]:
#export
def get_first_face(detector, fn, resize=.5, equalize=False):
    '''
    Returns the first detected face from a video
    '''
    assert Path(fn).exists()
    v_cap = cv2.VideoCapture(str(fn))
    v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    iframe, face = None, None
    for i in range(v_len):
        _ = v_cap.grab()
        success, frame = v_cap.retrieve()
        if not success: continue
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2YCrCb)
        if equalize:
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            frame[:,:,0] = clahe.apply(frame[:,:,0])
        frame = cv2.cvtColor(frame, cv2.COLOR_YCrCb2RGB)
        frame = PIL.Image.fromarray(frame)
        if resize is not None: frame = frame.resize([int(d * resize) for d in frame.size])
        face = detector(frame)
        if face is not None:
            iframe = i
            break
    v_cap.release()
    return iframe, face

### Videos in which not a single face is detected by MTCNN

In [13]:
#export
def get_has_face(fnames, detector):
    if isinstance(fnames, (str, Path)): fnames = [fnames]
    res = []
    for i in progress_bar(range(len(fnames))):
        iframe, face = get_first_face(detector, fnames[i], equalize=True)
        res.append(True if iframe is not None else False)
    return res

In [14]:
fnames = [SOURCE/o for o in annots.fname]
hasface = get_has_face(fnames, detector)

In [15]:
len(hasface), len([o for o in hasface if o == False])

(400, 0)

In [16]:
annots_noface = annots[~np.array(hasface)]

In [17]:
annots_noface.shape

(0, 4)

In [18]:
fnames_noface = [SOURCE/o for o in annots_noface.fname]
labels = [f'{o.fname} {o.label}' for i, o in annots_noface.iterrows()]

In [19]:
HTML(html_vids(fnames_noface, titles=labels))

Save annotations for videos with no detected faces for further investigation.

In [20]:
annots_noface.to_csv('annots_noface.csv', index=False)

In [22]:
annots_noface = pd.read_csv('annots_noface.csv')

### `ItemList` that returns the first face from a video

In [49]:
SegmentationLabelList??

In [66]:
#export
class VideoFaceList(ImageList):
    def __init__(self, *args, detector=None, device=None, resize=.5, equalize=False, **kwargs):
        print(detector.select_largest)
        print(device)
        print(resize)
        print(equalize)
        print()
        if device is None: device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
        if detector == None: detector = MTCNN(device=device, post_process=False)
        self.detector = detector
        self.resize, self.equalize = resize, equalize
        super().__init__(*args, **kwargs)
    
    def get_face(self, fn:Path):
        iframe, face = get_first_face(self.detector, fn, self.resize, self.equalize)
        if iframe is None or face is None: raise Exception(f'No faces detected in {fn}')
        return iframe, face
    
    def open(self, fn:Path):
        iframe, face = self.get_face(fn)
        return Image(face / 255)

In [60]:
class VideoFaceList(ImageList):
    def __init__(self, *args, **kwargs):
        print(kwargs['detector'].select_largest)
        print(kwargs['resize'])
        print(kwargs['equalize'])

Let's test this, excluding those videos with no face detected.

In [24]:
annots[np.array(hasface)].shape

(400, 4)

In [25]:
annots_hasface = annots[np.array(hasface)]

In [55]:
ImageList.

Object `ImageList._split` not found.


In [69]:
src = (VideoFaceList
       .from_df(df=annots_hasface, path=SOURCE, cols='fname', resize=1,
                detector=detector, equalize=True))

False
None
1
True



In [71]:
src.copy

AttributeError: 'VideoFaceList' object has no attribute 'kwargs'

In [67]:
src = (VideoFaceList
       .from_df(df=annots_hasface, path=SOURCE, cols='fname', resize=1,
                detector=detector, equalize=True)
       .split_by_rand_pct())

False
None
1
True



AttributeError: 'NoneType' object has no attribute 'select_largest'

In [68]:
%debug

> [0;32m<ipython-input-66-132210fa8fb9>[0m(4)[0;36m__init__[0;34m()[0m
[0;32m      2 [0;31m[0;32mclass[0m [0mVideoFaceList[0m[0;34m([0m[0mImageList[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      3 [0;31m    [0;32mdef[0m [0m__init__[0m[0;34m([0m[0mself[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0mdetector[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mdevice[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mresize[0m[0;34m=[0m[0;36m.5[0m[0;34m,[0m [0mequalize[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 4 [0;31m        [0mprint[0m[0;34m([0m[0mdetector[0m[0;34m.[0m[0mselect_largest[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      5 [0;31m        [0mprint[0m[0;34m([0m[0mdevice[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      6 [0;31m        [0mprint[0m[0;34m([0m[0mresize[0m[0;34m)[0m[0;34m

In [33]:
src.train.detector.select_largest

True

In [18]:
%%time
src

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 5.48 µs


ItemLists;

Train: VideoFaceList (320 items)
Image (3, 160, 160),Image (3, 160, 160),Image (3, 160, 160),Image (3, 160, 160),Image (3, 160, 160)
Path: ../data/train_sample_videos;

Valid: VideoFaceList (80 items)
Image (3, 160, 160),Image (3, 160, 160),Image (3, 160, 160),Image (3, 160, 160),Image (3, 160, 160)
Path: ../data/train_sample_videos;

Test: None

In [35]:
%%time
data = src.label_from_df('label').databunch(bs=32, device=device, num_workers=4)

CPU times: user 41.3 s, sys: 648 ms, total: 42 s
Wall time: 9.29 s


In [36]:
%%time
xb, yb = next(iter(data.train_dl))

KeyboardInterrupt: 

In [31]:
xb.shape, yb.shape

(torch.Size([32, 3, 160, 160]), torch.Size([32]))

In [32]:
yb

tensor([0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 1, 1, 0, 0, 0], device='cuda:0')

In [33]:
%%time 
xb, yb = next(iter(data.valid_dl))

CPU times: user 17.7 s, sys: 336 ms, total: 18.1 s
Wall time: 3.77 s


In [34]:
xb.shape, yb.shape

(torch.Size([32, 3, 160, 160]), torch.Size([32]))

# -fin

In [1]:
from nbdev.export import *
notebook2script()

Converted 00_lookatdata.ipynb.
This cell doesn't have an export destination and was ignored:
e
This cell doesn't have an export destination and was ignored:
e
This cell doesn't have an export destination and was ignored:
e
This cell doesn't have an export destination and was ignored:
e
Converted 01_face_recog.ipynb.
Converted 01a_face_extraction.ipynb.
Converted 01b_get_one_face.ipynb.
This cell doesn't have an export destination and was ignored:
e
Converted 01c_noface_videos.ipynb.
Converted 01d_most_likely_face.ipynb.
Converted 01e_margin.ipynb.
Converted 02_fix_luminosity.ipynb.
Converted 02a_create_faceimage_dataset.ipynb.
Converted 02bis_Create_Dataset-Copy1.ipynb.
Converted 02bis_Create_Dataset.ipynb.
Converted 03_mesonet.ipynb.
Converted 04_Baseline_Classification-Copy1.ipynb.
Converted 04_Baseline_Classification.ipynb.
Converted 04_Classification.ipynb.
Converted 04a_classification_videolist.ipynb.
Converted 05_Class_Imbalance.ipynb.
Converted 06_Focal_Loss.ipynb.
This cell doe