In [1]:
import sys
import os
import subprocess
import torch
import numpy as np
import fileinput
import matplotlib.pyplot as plt
import torch.distributed as dist
import models
from dfd_utils.FaceForensicsDataset import FaceForensicsDataset
from dfd_utils.utils import plot_images, get_embeddings, plot_embeddings_2D, plot_embeddings_3D
from torch.utils.data import Subset
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm 

from sklearn.manifold import TSNE
from matplotlib import cm

from torchvision import transforms, utils, datasets
import cv2
from facenet_pytorch import MTCNN
import random

%load_ext autoreload
%reload_ext autoreload
%autoreload 2

In [None]:
!pip install tabulate
!pip install configargparse
!pip install filelock
!pip install strconv

In [47]:
CONFIG_FILE_PATH = "/srv/DeepFakeDetection/andrew_atonov_simclr_pytorch/simclr-pytorch/configs/generic_config.yaml"
#cifar_train_epochs1000_bs1024.yaml
#ff_train.yaml

In [20]:
# Modify params in the file
batch_size = 32
eval_only = True

## Train Base Model

In [16]:
cmd = f'python train.py --config {CONFIG_FILE_PATH}'
!{cmd}

args: Namespace(ckpt='', config_file='./configs/cifar_train_epochs1000_bs1024.yaml', deepfakes=False, dist='ddp', dist_address='127.0.0.1:1234', eval_freq=4800, eval_only=False, iters=48000, log_freq=48, lr=4.0, lr_schedule='warmup-anneal', name='reproduce-cifar10', node_rank=0, opt='lars', problem='sim-clr', save_freq=4800, seed=-1, tmp=False, verbose=True, warmup=0.01, weight_decay=1e-06, workers=2, world_size=1)
arch: ResNet50
aug: true
batch_size: 128
ckpt: ''
color_dist_s: 0.5
config_file: ./configs/cifar_train_epochs1000_bs1024.yaml
data: cifar
deepfakes: false
dist: ddp
dist_address: 127.0.0.1:1234
eval_freq: 4800
eval_only: false
iters: 48000
log_freq: 48
lr: 4.0
lr_schedule: warmup-anneal
multiplier: 2
name: reproduce-cifar10
node_rank: 0
opt: lars
problem: sim-clr
root: /srv/DeepFakeDetection/andrew_atonov_simclr_pytorch/simclr-pytorch/logs/exman-train.py/runs/000143
save_freq: 4800
scale_lower: 0.08
seed: -1
sync_bn: true
temperature: 0.5
tmp: false
verbose: true
warmup: 0.0

## Eval Base Model

In [17]:
# Eval base modelb
cmd = f'python train.py --config {CONFIG_FILE_PATH} --eval_only True'
!{cmd}

args: Namespace(ckpt='', config_file='./configs/cifar_train_epochs1000_bs1024.yaml', deepfakes=False, dist='ddp', dist_address='127.0.0.1:1234', eval_freq=4800, eval_only=True, iters=48000, log_freq=48, lr=4.0, lr_schedule='warmup-anneal', name='reproduce-cifar10', node_rank=0, opt='lars', problem='sim-clr', save_freq=4800, seed=-1, tmp=False, verbose=True, warmup=0.01, weight_decay=1e-06, workers=2, world_size=1)
arch: ResNet50
aug: true
batch_size: 128
ckpt: ''
color_dist_s: 0.5
config_file: ./configs/cifar_train_epochs1000_bs1024.yaml
data: cifar
deepfakes: false
dist: ddp
dist_address: 127.0.0.1:1234
eval_freq: 4800
eval_only: true
iters: 48000
log_freq: 48
lr: 4.0
lr_schedule: warmup-anneal
multiplier: 2
name: reproduce-cifar10
node_rank: 0
opt: lars
problem: sim-clr
root: /srv/DeepFakeDetection/andrew_atonov_simclr_pytorch/simclr-pytorch/logs/exman-train.py/runs/000144
save_freq: 4800
scale_lower: 0.08
seed: -1
sync_bn: true
temperature: 0.5
tmp: false
verbose: true
warmup: 0.01


In [None]:
# Train classificaiton Head

In [None]:
# Load checkpoint

## Plot Embeddings

### Load Model

In [2]:
plt.rcParams['figure.dpi']=300
device = torch.device('cuda')
dist.init_process_group(
            backend='nccl',
            init_method='tcp://%s' % 'localhost:8882',
            world_size=1,
            rank=0,
        )

In [3]:
MODELS_FOLDER = '/media/shirbar/My Passport/trained_models'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

chkpt_path = f'{MODELS_FOLDER}/simclr_original_augs_pretrained_resnet.pth.tar'
print('loading checkpoint:', chkpt_path, ' ...')
chkpt = torch.load(chkpt_path,
                    map_location=device)
print('checkpoint loaded!')

print('loading model...')
model = models.ssl.SimCLR.load(chkpt, device=device)
model.eval()
print('model loaded!')

loading checkpoint: /media/shirbar/My Passport/trained_models/simclr_original_augs_pretrained_resnet.pth.tar  ...
checkpoint loaded!
loading model...
device is: cuda
ddp
** Removing original FC layer **
** Using avgpool **


Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /home/leva1/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

** Loading pretrained ResNet50 weights **
ssl.__init__:device is: cuda
hparams.gpu : 0
model loaded!


### Load FaceForensics Dataset

In [11]:
def load_ff_ds_test(wanted_ds_size,
                    batch_size,
                    load_deepfakes=False,
                    load_face2face=False,
                    load_neural_textures=False,
                    masking_transforms=False,
                    transform=None):
    TEST_DATASET_PATH = '/media/shirbar/My Passport/FaceForensics/split_ds/test'
    ff_ds_test  = FaceForensicsDataset(TEST_DATASET_PATH,
                                    transform=None,
                                    load_deepfakes=load_deepfakes,
                                    load_face2face=load_face2face,
                                    load_neural_textures=load_neural_textures,
                                  masking_transforms=masking_transforms)

    ff_ds_test_len = len(ff_ds_test)
    wanted_ds_size = wanted_ds_size if ff_ds_test_len > wanted_ds_size else ff_ds_test_len
    random_indices = random.sample(range(0,ff_ds_test_len), wanted_ds_size)
    test_subset = Subset(ff_ds_test, random_indices)

    test_loader = DataLoader(test_subset, batch_size=batch_size, shuffle = True)

    print('test size:', test_subset.__len__())
    return test_loader

## Deepfakes

In [12]:
test_loader = load_ff_ds_test(2000, 16, load_deepfakes=True)

loading real images...
real images loaded!
loading deepfakes...
deepfakes loaded!
real imgs len: 73768
masks len: 73768
fakes imgs len: 73770
fakes masks len: 73770
final imgs len: 147538
final masks len: 147538
asserting order
assertion passed!
test size: 2000


In [None]:
embeds, targets = get_embeddings(model, test_loader, device)

 53%|█████▎    | 66/125 [2:23:08<1:30:08, 91.66s/it] 

### Plot example images

In [None]:
images, _, targets = next(iter(test_loader))
plot_images(images, targets.tolist())

### 2D Embeddings - Deepfakes + Real

In [None]:
plot_embeddings_2D(embeds, targets)

### 3D Embeddings - Deepfakes + Real

In [None]:
%matplotlib notebook
plot_embeddings_3D(embeds, targets)

## Face2Face

In [None]:
test_loader = load_ff_ds_test(2000, 16, load_face2face=True)

In [None]:
embeds, targets = get_embeddings(model, test_loader, device)

### Plot example images

In [None]:
images, _, targets = next(iter(test_loader))
plot_images(images, targets.tolist())

### 2D Embeddings - Face2Face + Real

In [None]:
%matplotlib inline
plot_embeddings_2D(embeds, targets)

### 3D Embeddings - Face2Face + Real

In [None]:
%matplotlib notebook
plot_embeddings_3D(embeds, targets)

## NeuralTextures

In [None]:
test_loader = load_ff_ds_test(2000, 16, load_neural_textures=True)

In [None]:
embeds, targets = get_embeddings(model, test_loader, device)

### Plot example images

In [None]:
images, _, targets = next(iter(test_loader))
plot_images(images, targets.tolist())

### 2D Embeddings - NeuralTextures + Real

In [None]:
plot_embeddings_2D(embeds, targets)

### 3D Embeddings - NeuralTextures + Real

In [None]:
%matplotlib notebook
plot_embeddings_3D(embeds, targets)

In [47]:
plt.rcParams['figure.dpi']=300
device = torch.device('cuda')
dist.init_process_group(
            backend='nccl',
            init_method='tcp://%s' % 'localhost:8882',
            world_size=1,
            rank=0,
        )

RuntimeError: trying to initialize the default process group twice!

In [196]:
device = torch.device('cuda')

chkpt = torch.load(
    '/media/shirbar/My Passport/trained_models/simclr_original_augs_pretrained_resnet.pth.tar',
    map_location=device)
model = models.ssl.SimCLR.load(ckpt, device=device)
model.eval()

device is: cuda
ddp
** Removing original FC layer **
** Using avgpool **
ssl.__init__:device is: cuda
hparams.gpu : 0


SimCLR(
  (model): DistributedDataParallel(
    (module): EncodeProject(
      (convnet): ResNet50(
        (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (layer1): Sequential(
          (0): Bottleneck(
            (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn1): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): SyncBatchNorm(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn3): SyncBatchNorm(256, eps=1e-05, moment