# Look at some samples

In [208]:
%reload_ext autoreload
%autoreload 2

In [209]:
#default_exp nb_00

In [210]:
#hide
from nbdev.showdoc import *

In [211]:
#export
from fastai.vision import *
import pandas as pd

In [212]:
SOURCE = Path('../data/dfdc_train_part_11/')

### Videos

In [213]:
fnames = get_files(SOURCE, extensions=['.mp4'])
fnames[:3], len(fnames)

([PosixPath('../data/dfdc_train_part_11/xecgqkezbe.mp4'),
  PosixPath('../data/dfdc_train_part_11/tuitlooreh.mp4'),
  PosixPath('../data/dfdc_train_part_11/hswgjnccsw.mp4')],
 2118)

In [214]:
#export
from IPython.display import display, Video, HTML

In [215]:
#export
def html_vid(fname, **kwargs): return display(Video(fname, **kwargs))

In [216]:
#export
def html_vid(fname):
    "Return HTML for video."
    return f'''
    <video width="300" height="250" controls>
    <source src="{fname}" type="video/mp4">
    </video>
    '''

In [217]:
HTML(html_vid(fnames[9]))

In [218]:
#export
def html_titled_vid(fname, title):
    "Return HTML for titled video."
    return f'<div><p>{title}</p><br>{html_vid(fname)}</div>'

In [219]:
#export
def html_vids(fnames, titles=None, ncols=3):
    "Return HTML for table of (titled) videos."
    n = len(fnames)
    if titles is None: titles = n * ['']
    assert len(titles) == n
    rs = []
    for i in range(0, n, ncols):
        fs, ts = fnames[i:i+ncols], titles[i:i+ncols]
        xs = (html_titled_vid(f, t) for f,t in zip(fs, ts))
        xs = (f'<td>{x}</td>' for x in xs)
        r = f"<tr>{''.join(xs)}</tr>"
        rs.append(r)
    return f"<table>{''.join(rs)}</table>"

In [220]:
HTML(html_vids(fnames[10:15], ['fake', 'real', 'fake', 'carrot', 'muffin'], ncols=3))

0,1,2
fake,real,fake
carrot,muffin,


### Annotations

In [221]:
f = get_files(SOURCE, extensions=['.json'])[0]
annots = pd.read_json(f).T

Note that FAKE videos have the additional annotation field *original*, which indicates the original, real video from which the fake was generated, perhaps.  For real videos, this field is `NaN`.

In [223]:
annots[annots.label=='REAL'].head()

Unnamed: 0,label,split,original
ztfjilznzu.mp4,REAL,train,
xfrfdspnox.mp4,REAL,train,
qifhccqwpi.mp4,REAL,train,
rmpwnnxmye.mp4,REAL,train,
tkofwngclr.mp4,REAL,train,


In [224]:
annots[annots.label=='FAKE'].head()

Unnamed: 0,label,split,original
qgqsgtekwl.mp4,FAKE,train,ztutsnlhtr.mp4
bahpguunin.mp4,FAKE,train,frczmdfzza.mp4
qekttcqtpm.mp4,FAKE,train,zesnphumru.mp4
zxuqoykuqj.mp4,FAKE,train,dovdrtvmbx.mp4
sqqgqnolwf.mp4,FAKE,train,oimivjeigb.mp4


This checks that all the fake videos' original video are also videos in this dataset.

In [271]:
assert set(annots.original[annots.original.notna()].unique())  - set(annots.index.unique()) == set()

Display a few samples with their label.

In [225]:
sr, sf = annots[annots.label=='REAL'].sample(8), annots[annots.label=='FAKE'].sample(8)
s = sr.append(sf)
s.head()
s['fname'] = pd.Series(s.index).apply(lambda o: SOURCE/o).values
s = s.sample(s.shape[0])

In [228]:
HTML(html_vids(s.fname, s.label))

0,1,2
REAL,REAL,FAKE
FAKE,REAL,FAKE
REAL,REAL,FAKE
REAL,FAKE,REAL
REAL,FAKE,FAKE
FAKE,,


Display some fake videos next to their original video.

In [278]:
a = np.array([4, 5, 6])
b = np.array([14, 15, 16])
np.stack([a, b], axis=1).reshape(-1)

array([ 4, 14,  5, 15,  6, 16])

In [284]:
n = 5
s = annots[annots.label == 'FAKE'].sample(n)
rs, fs = s.index.values, s.original.values
vids = np.stack([rs, fs], axis=1).reshape(-1)
vids = [SOURCE/o for o in vids]
HTML(html_vids(vids, ncols=2))

0,1
,
,
,
,
,


# -fin

In [198]:
from nbdev.export import *

In [204]:
notebook2script()

Converted 00_lookatdata.ipynb.
This cell doesn't have an export destination and was ignored:
e
This cell doesn't have an export destination and was ignored:
e
This cell doesn't have an export destination and was ignored:
e
This cell doesn't have an export destination and was ignored:
e
This cell doesn't have an export destination and was ignored:
e
Converted 01_face_recog.ipynb.
