# Deepfake submission

This notebook is intended to be a submission kernel for the competition.  To use it, you need to add a dataset

In [1]:
!pip install /kaggle/input/facenet-pytorch-vggface2/facenet_pytorch-2.0.0-py3-none-any.whl
from facenet_pytorch.models.inception_resnet_v1 import get_torch_home
torch_home = get_torch_home()
# Copy model checkpoints to torch cache so they are loaded automatically by the package
!mkdir -p $torch_home/checkpoints/
!cp /kaggle/input/facenet-pytorch-vggface2/20180402-114759-vggface2-logits.pth $torch_home/checkpoints/vggface2_DG3kwML46X.pt
!cp /kaggle/input/facenet-pytorch-vggface2/20180402-114759-vggface2-features.pth $torch_home/checkpoints/vggface2_G5aNV2VSMn.pt
! cp ../input/realfake/kernel_module.py ../working/.
from kernel_module import *

Processing /kaggle/input/facenet-pytorch-vggface2/facenet_pytorch-2.0.0-py3-none-any.whl
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.0.0


### Data

In [2]:
SOURCE = Path('../input/deepfake-detection-challenge/train_sample_videos/')

In [3]:
f = get_files(SOURCE, extensions=['.json'])[0]
annots = pd.read_json(f).T
annots.reset_index(inplace=True)
annots.rename({'index':'fname'}, axis=1, inplace=True)
annots.head()

Unnamed: 0,fname,label,split,original
0,aagfhgtpmv.mp4,FAKE,train,vudstovrck.mp4
1,aapnvogymq.mp4,FAKE,train,jdubbvfswz.mp4
2,abarnvbtwb.mp4,REAL,train,
3,abofeumbvv.mp4,FAKE,train,atvmxvwyns.mp4
4,abqwwspghj.mp4,FAKE,train,qzimuostzz.mp4


#### Get face detector

In [4]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [5]:
detector = MTCNN(device=device, post_process=False)

#### Remove videos in which no faces are detected.  

In [6]:
fnames = [SOURCE/o for o in annots.fname]

In [7]:
hasface = get_has_face(fnames, detector)

In [8]:
annots_hasface = annots[np.array(hasface)]

#### Create `DataBunch`

In [9]:
src = (VideoFaceList
       .from_df(df=annots_hasface, path=SOURCE, cols='fname', detector=detector)
       .split_by_rand_pct())

In [10]:
bs, sz = 32, 128

In [11]:
data = (src.label_from_df('label').transform(get_transforms(), size=sz)
        .databunch(bs=bs, num_workers=0).normalize(imagenet_stats))

CPU times: user 4.6 s, sys: 72 ms, total: 4.67 s
Wall time: 3.79 s


### Model

In [13]:
model = MesoNet()

### Learner

In [18]:
learn = Learner(data, model, metrics=accuracy, path='../input/realfake/', model_dir='')

In [19]:
learn.load('mesonet_stage1');

### Inference

In [20]:
SOURCE_TEST = Path('../input/deepfake-detection-challenge/test_videos/')

In [21]:
fnames = get_files(SOURCE_TEST, extensions=['.mp4'])
fnames[:3]

[PosixPath('../input/deepfake-detection-challenge/test_videos/iorbtaarte.mp4'),
 PosixPath('../input/deepfake-detection-challenge/test_videos/vnlzxqwthl.mp4'),
 PosixPath('../input/deepfake-detection-challenge/test_videos/gqnaxievjx.mp4')]

Again, because we can't deal with videos which have no detected face, we need to ignore them for now.

In [22]:
hasface_tst = get_has_face(fnames, detector)

In [23]:
fnames_tst_hasface = [f for f, b in zip(fnames, hasface_tst) if b]
len(fnames_tst_hasface)

396

In [24]:
vlist = VideoFaceList(sorted(fnames_tst_hasface), detector=detector)

In [26]:
len(vlist.items), len(vlist)

(396, 396)

Note that there are 400 test videos, but we will only be able to write an entry in the submission file for 396 of these.

In [None]:
! head  ../data/sample_submission.csv

In [29]:
df = infer_on_videolist(learn, vlist)

In [30]:
df.head()

Unnamed: 0,filename,label
0,aassnaulhq.mp4,0
1,aayfryxljh.mp4,0
2,acazlolrpz.mp4,0
3,adohdulfwb.mp4,0
4,ahjnxtiamx.mp4,1


In [31]:
df.to_csv('submission.csv', index=False)

In [32]:
! head submission.csv

filename,label
aassnaulhq.mp4,0
aayfryxljh.mp4,0
acazlolrpz.mp4,0
adohdulfwb.mp4,0
ahjnxtiamx.mp4,1
ajiyrjfyzp.mp4,0
aktnlyqpah.mp4,0
alrtntfxtd.mp4,0
aomqqjipcp.mp4,1


# - fin