In [None]:
# from tensorflow import keras
# from tensorflow.keras import applications
# # from keras.backend.tensorflow_backend import set_session
# import tensorflow as tf
# config = tf.compat.v1.ConfigProto()
# config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
# config.log_device_placement = True  # to log device placement (on which device the operation ran)
# sess = tf.compat.v1.Session(config=config)
# # set_session(sess)  # set this TensorFlow session as the default session for Keras

# from tensorflow.keras.layers import Dense
# from tensorflow.keras.models import Model 
# from tensorflow.keras import optimizers
# from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard, EarlyStopping

In [2]:
from facenet_pytorch import MTCNN
from PIL import Image
import matplotlib.pyplot as plt
import torch
from imutils.video import FileVideoStream
from custom_utils import return_all_video_paths
import cv2
import time
import glob
import os
import json
import random
import numpy as np
import imutils
import threading
from IPython.display import display
from os.path import join
from tqdm.notebook import tqdm
import traceback

device = 'cuda' if torch.cuda.is_available() else 'cpu'

ModuleNotFoundError: No module named 'facenet_pytorch'

In [None]:
from __future__ import print_function, division

import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import copy

## Prepare data

In [None]:
class FastMTCNN(object):
    """Fast MTCNN implementation."""
    
    def __init__(self, resize=1, *args, **kwargs):
        """Constructor for FastMTCNN class.
        
        Arguments:
            stride (int): The detection stride. Faces will be detected every `stride` frames
                and remembered for `stride-1` frames.
        
        Keyword arguments:
            resize (float): Fractional frame scaling. [default: {1}]
            *args: Arguments to pass to the MTCNN constructor. See help(MTCNN).
            **kwargs: Keyword arguments to pass to the MTCNN constructor. See help(MTCNN).
        """
        self.resize = resize
        self.mtcnn = MTCNN(*args, **kwargs)
        
    def __call__(self, frames, double_frames=False):
        """Detect faces in frames using strided MTCNN."""
        frames_original = frames.copy()
        if self.resize != 1:
            frames = [f.resize([int(d * self.resize) for d in f.size]) for f in frames]
                      
        boxes, probs = self.mtcnn.detect(frames)
        
#         print(boxes)
#         print(probs)
#         return (boxes, probs)
        
        faces = []
        faces_lag = []
        frame_boxes = None
        idx = 0
        original_length = int(len(frames_original)/2) if double_frames else len(frames_original) 
        for i in range(original_length):
            frame_boxes = boxes[i]
            if frame_boxes is None:
                pass
            elif len(frame_boxes) > 1:
                return []
            else:
                frame = frames_original[idx]
                for box in frame_boxes:
                    faces.append(np.array(frame.crop(box/self.resize)))
                    if double_frames:
                        frame = frames_original[idx+1]
                        faces_lag.append(np.array(frame.crop(box/self.resize)))
                if double_frames:
                    idx = idx + 1
                idx = idx + 1
        faces.extend(faces_lag)
        
        return faces

In [None]:
def return_all_video_paths_v2(train_path = '../deepfake_train_full'):
    train_folders = [os.path.join(train_path, x) for x in os.listdir(train_path) if x[-1] != 'p']
    train_video_folders = [os.path.join(x, os.listdir(x)[0]) for x in train_folders]

    all_videos = {}
    for train_video_folder in train_video_folders:
        with open(join(train_video_folder, 'metadata.json')) as json_file:
            metadata = json.load(json_file)
        all_videos[train_video_folder] = []
        for key in metadata:
            ann = metadata[key]
            if ann['label'] == 'FAKE':
                all_videos[train_video_folder].append({
                    'FAKE': join(train_video_folder, key),
                    'REAL': join(train_video_folder, ann['original'])}
                )

    return all_videos

In [None]:
class GetFaces:
    
    def __init__(self, fast_mtcnn):
        self.fast_mtcnn = fast_mtcnn
        
    @staticmethod
    def open_video(path):
        v_cap = FileVideoStream(path).start()
        v_len = int(v_cap.stream.get(cv2.CAP_PROP_FRAME_COUNT))
        
        return (v_cap, v_len)
    
    @staticmethod
    def get_frames_from_video(v_cap, v_len, double_frames=False, nr_of_frames=48, offset=5):
        frames = []
        for j in range(v_len):
            frame = v_cap.read()
            if frame is not None:
                if j%offset == 0 or (((j+int(offset/2))%offset == 0) and double_frames):
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    frame = Image.fromarray(frame)
                    frames.append(frame)
            else:
                break
            if len(frames) >= nr_of_frames:
                if double_frames and len(frames) >= nr_of_frames*2:
                    break
                elif not double_frames:
                    break
                
        return frames
    
    def detect_faces(self, frames, double_frames=False):
        return self.fast_mtcnn(frames, double_frames)
    
    def resize_faces(self, faces, augment):
        args = {}
        new_shape = (224,224)
        resized_faces = []
        for face in faces:
            img = face
            if np.argmax(img.shape[:2]) == 0:
                args['height'] = new_shape[0]
            else:
                args['width'] = new_shape[1]
            try:
                resized_img = imutils.resize(img, **args)
                if augment:
                    resized_img = self.augment_img(resized_img)
                
                if np.argmax(img.shape[:2]) == 0:
                    diff = new_shape[1] - resized_img.shape[1]
                    resized_img = np.pad(resized_img, ((0,0), (0,diff), (0,0)), 'constant', constant_values=0)
                else:
                    diff = new_shape[0] - resized_img.shape[0]
                    resized_img = np.pad(resized_img, ((0,diff), (0,0), (0,0)), 'constant', constant_values=0)
                resized_faces.append(resized_img)
    
            except Exception as e:
                print(e)
                
        return resized_faces
    
    def create_batch(self, resized_fake_faces, resized_real_faces, \
                     batch_size, video_annotation):
#         batch_fake = []
#         batch_real = []
        batch = []
#         for i in range(m*2):
        for i in range(batch_size):
            s = int((batch_size/2)*i)
            e = int((batch_size/2)*(i+1))
            if s >= len(resized_real_faces) or s >= len(resized_fake_faces):
                break
            real = resized_real_faces[s:e]
            fake = resized_fake_faces[s:e]
            if len(real) > 0 and len(fake) > 0:
                X = np.array(fake + real) / 255
                Y = np.concatenate((np.ones(len(resized_fake_faces[s:e])), np.zeros(len(resized_real_faces[s:e]))))
                if len(X) > 0 and len(Y) > 0:
#                     if augment:
#                         temp = []
#                         for x in X:
#                             temp.append(self.augment_img(x))
#                         X = np.array(temp)
                    X = X.reshape((X.shape[0],3,224,224))
                    Y = np.expand_dims(np.array(Y), axis=1)
                    batch.append((X, Y))
                else:
                    continue
    #                 print('---')
    #                 print('0 faces from:', video_annotation)
                
        return batch
    
    def augment_img(self, img):
        if random.random() > 0.5:
            img = np.flip(img, axis=1)
            
        return img
    
    def process_functions(self, video_key, nr_of_frames=48, frame_offset=5, double_frames=False):
        video, video_len = self.open_video(video_key)
        frames = self.get_frames_from_video(video, video_len, nr_of_frames=nr_of_frames, \
                                            offset=frame_offset, double_frames=double_frames)
        faces = self.detect_faces(frames, double_frames)
        resized_faces = self.resize_faces(faces, augment=False)
        
        return resized_faces
    
    def get_resized_faces(self, video_annotation, double_frames=False, batch_size=16, nr_of_frames=48):
        start = time.time()
        
        resized_fake_faces = self.process_functions(video_annotation['FAKE'], nr_of_frames, double_frames=double_frames)
        resized_real_faces = self.process_functions(video_annotation['REAL'], nr_of_frames, double_frames=double_frames)
        
        batches = self.create_batch(resized_fake_faces, resized_real_faces, \
                     batch_size, video_annotation)
    
        return batches
    
    def insert_data_to_list(self, shared_list, all_videos_annotations, batch_size=16, max_len_shared_list=100):
        keys = all_videos_annotations.keys()
        while True:
            ones = np.ones(batch_size)
            zeros = np.zeros(batch_size)
            
            # Random folder and video each iteration
            key = random.choice(list(keys))
            video_annotation = random.choice(all_videos_annotations[key])
            
#             for key in self.all_videos_annotations:
#                 for video_annotation in self.all_videos_annotations[key]:
#             print(video_annotation)
            try:
                batches = self.get_resized_faces(video_annotation, double_frames=True)
                for X, Y in batches:
                    shared_list.append((X, Y))
                random.shuffle(shared_list)
            except Exception as e:
#                         continue
                print('-----')
                print(e)
                print(traceback.format_exc())
                print(video_annotation)
            # Check if list already full
            while len(shared_list) > max_len_shared_list:
                time.sleep(0.01)

In [None]:
all_videos_annotations = return_all_video_paths_v2()

In [None]:
fast_mtcnn = FastMTCNN(
    resize=0.25,
    margin=14,
    factor=0.6,
    keep_all=True,
    device=device,
    thresholds=[0.95,0.95,0.95]
)

# fast_mtcnn = FastMTCNN(
#     resize=0.25,
#     margin=0,
#     select_largest=False,
# #     factor=0.6,
# #     keep_all=False,
#     device=device
# )

In [None]:
get_faces = GetFaces(fast_mtcnn)

In [None]:
train_list = []
get_faces = GetFaces(fast_mtcnn)
val_split = int(len(all_videos_annotations)*0.9)
train_all_videos_annotations = {key: all_videos_annotations[key] for key in list(all_videos_annotations.keys())[:val_split]}
val_all_videos_annotations = {key: all_videos_annotations[key] for key in list(all_videos_annotations.keys())[val_split:]}
p_train = threading.Thread(target=get_faces.insert_data_to_list, \
            args=(train_list, train_all_videos_annotations,))
p_train.start()


In [None]:
train_list

In [None]:
len(train_list)

In [None]:
def generator(shared_list):
    while True:
        if len(shared_list) > 50:
            yield shared_list.pop(0)
        else:
            time.sleep(0.01)

In [None]:
gen_train = generator(train_list)

In [None]:
i = 0
for x, y in gen_train:
    break

In [None]:
for img in x:
    plt.imshow(img.reshape((224,224,3)))
    plt.show()

In [None]:
frame = x[3].reshape((224,224,3))

In [None]:
img = np.flip([frame], axis=2)
plt.imshow(img[0])

In [None]:
plt.imshow(frame)

## Train model

In [None]:
model_ft = models.resnet101(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
# model_ft.fc = nn.Sigmoid()
# model_ft.add_module('last_sigmoid', nn.Sigmoid())
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 1), nn.Sigmoid())
# model_ft = nn.Sequential(model_ft, nn.Sigmoid())

model_ft = model_ft.to(device)

criterion = nn.BCEWithLogitsLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0002, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1) 
lr_sch = lr_scheduler.ReduceLROnPlateau(optimizer_ft, factor=0.3, patience=5)

In [None]:
model_ft

In [None]:
def train_model(model, criterion, optimizer, scheduler, generator, 
                num_epochs=25, steps_per_epoch=500):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = 9999

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        step = 0
        for inputs, labels in generator:
            inputs = torch.from_numpy(inputs).float().to(device)
            labels = torch.from_numpy(labels).float().to(device)

            step = step + 1
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
#                 loss = criterion(outputs) - labels

                loss.backward()
                optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            print("Step: {} - Loss {}".format(step, running_loss / step), end="\r")
#             print(running_loss / step)
            running_corrects += torch.sum(preds == labels.data)
            if step > steps_per_epoch:
                break
        scheduler.step(running_loss)

        epoch_loss = running_loss / steps_per_epoch
        epoch_acc = running_corrects / steps_per_epoch

        print('Loss: {:.4f} Acc: {:.4f}'.format(
            epoch_loss, epoch_acc))

        # deep copy the model
        if epoch_loss < best_loss:
#             model.save_state_dict('mytraining.pt')
            torch.save(model, 'mytraining_full_3.pth')
            best_loss = epoch_loss
            best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, lr_sch,
                       gen_train,
                       num_epochs=200,
                       steps_per_epoch=2000)

In [23]:
model_ft = torch.load('mytraining_full_2.pth')
model_ft2 = train_model(model_ft, criterion, optimizer_ft, lr_sch,
                       gen_train,
                       num_epochs=200,
                       steps_per_epoch=2000)

Epoch 0/199
----------
Loss: 9.6770 Acc: 127.0000343595277

Epoch 1/199
----------
Loss: 9.7428 Acc: 126.0000755617904

Epoch 2/199
----------
Loss: 9.9018 Acc: 126.0000246129376

Epoch 3/199
----------
Loss: 9.8084 Acc: 127.0000616726974

Epoch 4/199
----------
Loss: 9.9175 Acc: 127.0000045777875

Epoch 5/199
----------
Loss: 9.7198 Acc: 126.0000140479506

Epoch 6/199
----------
----- 1052 - Loss 9.844530181853036
need at least one array to stack
Traceback (most recent call last):
  File "<ipython-input-6-c9b2f71155d1>", line 136, in insert_data_to_list
    batches = self.get_resized_faces(video_annotation, double_frames=True)
  File "<ipython-input-6-c9b2f71155d1>", line 114, in get_resized_faces
    resized_fake_faces = self.process_functions(video_annotation['FAKE'], nr_of_frames, double_frames=double_frames)
  File "<ipython-input-6-c9b2f71155d1>", line 106, in process_functions
    faces = self.detect_faces(frames, double_frames)
  File "<ipython-input-6-c9b2f71155d1>", line 34, 

KeyboardInterrupt: 

In [23]:
model_ft = torch.load('mytraining_full_2.pth')
model_ft2 = train_model(model_ft, criterion, optimizer_ft, lr_sch,
                       gen_train,
                       num_epochs=200,
                       steps_per_epoch=2000)

Epoch 0/199
----------
Loss: 9.6559 Acc: 126.0000327526551

Epoch 1/199
----------
Loss: 9.7508 Acc: 127.0000320994147

Epoch 2/199
----------
Loss: 9.6673 Acc: 126.0000108060417

Epoch 3/199
----------
Loss: 9.7608 Acc: 126.0000061871283

Epoch 4/199
----------
Loss: 9.7372 Acc: 126.0000163245012

Epoch 5/199
----------
Loss: 9.7060 Acc: 127.0000143164387

Epoch 6/199
----------
Loss: 9.8739 Acc: 126.0000670197437

Epoch 7/199
----------
Loss: 10.0110 Acc: 127.0000856854983

Epoch 8/199
----------
Loss: 9.6635 Acc: 126.0000943465534

Epoch 9/199
----------
----- 1111 - Loss 9.656533383014072
list index out of range
Traceback (most recent call last):
  File "<ipython-input-6-c9b2f71155d1>", line 139, in insert_data_to_list
    random.shuffle(shared_list)
  File "C:\Users\Anti\Anaconda3\envs\torch\lib\random.py", line 277, in shuffle
    x[i], x[j] = x[j], x[i]
IndexError: list index out of range

{'FAKE': '../deepfake_train_full\\dfdc_train_part_13\\dfdc_train_part_13\\sherosbvvz.mp4',

KeyboardInterrupt: 

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, lr_sch,
                       gen_train,
                       num_epochs=200,
                       steps_per_epoch=2000)

Epoch 0/199
----------
Loss: 10.5068 Acc: 127.0000904371126

Epoch 1/199
----------
Loss: 10.0317 Acc: 127.0000906314531

Epoch 2/199
----------
----- 1832 - Loss 9.774019267849265
need at least one array to stack
Traceback (most recent call last):
  File "<ipython-input-6-c9b2f71155d1>", line 136, in insert_data_to_list
    batches = self.get_resized_faces(video_annotation, double_frames=True)
  File "<ipython-input-6-c9b2f71155d1>", line 114, in get_resized_faces
    resized_fake_faces = self.process_functions(video_annotation['FAKE'], nr_of_frames, double_frames=double_frames)
  File "<ipython-input-6-c9b2f71155d1>", line 106, in process_functions
    faces = self.detect_faces(frames, double_frames)
  File "<ipython-input-6-c9b2f71155d1>", line 34, in detect_faces
    return self.fast_mtcnn(frames, double_frames)
  File "<ipython-input-4-140578ad711d>", line 25, in __call__
    boxes, probs = self.mtcnn.detect(frames)
  File "C:\Users\Anti\Anaconda3\envs\torch\lib\site-packages\face

In [None]:
i = 0
for x, y in gen_train:
    if i > 4:
        break
    i = i + 1
    idx = 0
    print(y[idx])
    plt.imshow(x[idx].reshape((224,224,3)))
    plt.show()
    
    idx = 9
    print(y[idx])
    plt.imshow(x[idx].reshape((224,224,3)))
    plt.show()

In [None]:
idx = 1
print(y[idx])
plt.imshow(x[idx].reshape((224,224,3)))

## Predict results

In [20]:
all_videos_annotations['../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0'][0]

{'FAKE': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\owxbbpjpch.mp4',
 'REAL': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\wynotylpnm.mp4'}

In [23]:
model_ft = models.resnet101(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 1), nn.Sigmoid())

model_ft = model_ft.to(device)


In [22]:
model_ft.load_state_dict(torch.load('mytraining.pt'))
model_ft.eval()

RuntimeError: Error(s) in loading state_dict for ResNet:
	Missing key(s) in state_dict: "fc.0.weight", "fc.0.bias". 
	Unexpected key(s) in state_dict: "fc.weight", "fc.bias". 

In [24]:
# resized_faces = get_faces.process_functions('../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\owxbbpjpch.mp4', \
#                                             nr_of_frames=10,\
#                                             frame_offset=20)
resized_faces = get_faces.process_functions('../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\wynotylpnm.mp4', \
                                            nr_of_frames=10,\
                                            frame_offset=20)
X = np.array(resized_faces)
X = X.reshape((X.shape[0],3,224,224))

In [25]:
with torch.set_grad_enabled(True):
    inputs = torch.from_numpy(X).float().to(device)
    outputs = model_ft(inputs)

In [26]:
outputs

tensor([[0.5907],
        [0.5313],
        [0.6162],
        [0.5726],
        [0.6020],
        [0.5330],
        [0.6194],
        [0.5655],
        [0.5579],
        [0.5624]], device='cuda:0', grad_fn=<SigmoidBackward>)

In [None]:
outputs

## Old

In [None]:
for x, y in train_gen:
    break

In [None]:
idx = 1
print(y[idx])
plt.imshow(x[idx])

In [None]:
idx = 9
print(y[idx])
plt.imshow(x[idx])

In [None]:
def generator(shared_list, batch_size):
    while True:
        if len(shared_list) > batch_size:
            X = []
            Y = []
            random.shuffle(shared_list)
            for i in range(batch_size):
                x, y = shared_list.pop()

                X.append(x/255)
                Y.append(y)
            yield (np.array(X).reshape((batch_size,3,224,224)), \
                   np.expand_dims(np.array(Y), axis=1))
        else:
            time.sleep(0.01)

In [None]:
batch = get_faces.get_resized_faces({'FAKE': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\ohaqlzfnuv.mp4', 'REAL': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\sttnfyptum.mp4'})

In [None]:
len(batch[0][1])

In [None]:
len(batch[1][1])

In [None]:
boxes, probs = get_faces.get_resized_faces({'FAKE': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\owxbbpjpch.mp4', 'REAL': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\wynotylpnm.mp4'})

In [None]:
boxes

In [None]:
probs

In [None]:
for i in range(len(boxes)):
    if boxes[i] is None:
        print('None', i)
    elif len(boxes[i]) == 2:
#         print(len(boxes[i]))
        print(i)

In [None]:
video_annotation ={'FAKE': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\ohaqlzfnuv.mp4', 
                   'REAL': '../deepfake_train_full\\dfdc_train_part_00\\dfdc_train_part_0\\sttnfyptum.mp4'}

In [None]:
l = []
batch_fake, batch_real = get_faces.get_resized_faces(video_annotation)

In [None]:
len(batch_fake)

In [None]:
len(batch_real)

In [None]:
len(batch_fake[0])

In [None]:
len(batch_real[0])

In [None]:
np.concatenate((np.ones(3), np.zeros(5)))

In [None]:
probs[8]

In [None]:
help(MTCNN)

In [None]:
probs[17]

In [None]:
np.array(train_list).shape

In [None]:
train_list = []
batch_fake, batch_real = get_faces.insert_data_to_list(train_list)

In [None]:
train_list

In [None]:
np.array(train_list[0]).shape

In [None]:
plt.imshow(train_list[0][0])

In [None]:
plt.imshow(train_list[0][6])

In [None]:
i = 2
split_batch = 8
n = 1
len(batch_fake[i][split_batch*n:split_batch*(n+1)] + batch_real[i][split_batch*n:split_batch*(n+1)])

In [None]:
def generator(shared_list, batch_size):
    while True:
        if len(shared_list) > batch_size:
            X = []
            Y = []
            random.shuffle(shared_list)
            for i in range(batch_size):
                x, y = shared_list.pop()

                X.append(x/255)
                Y.append(y)
            yield (np.array(X).reshape((batch_size,3,224,224)), \
                   np.expand_dims(np.array(Y), axis=1))
        else:
            time.sleep(0.01)

In [None]:
gen_train = generator(train_list, 16)

In [None]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [None]:
model_ft = models.resnet101(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 1)

model_ft = model_ft.to(device)

criterion = nn.BCEWithLogitsLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
def train_model(model, criterion, optimizer, scheduler, generator, 
                num_epochs=25, steps_per_epoch=500):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_loss = 9999

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0

        # Iterate over data.
        step = 0
        for inputs, labels in generator:
            inputs = torch.from_numpy(inputs).float().to(device)
            labels = torch.from_numpy(labels).float().to(device)

            step = step + 1
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(True):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
#                 loss = criterion(outputs) - labels

                loss.backward()
                optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            print("Step: {} - Loss {}".format(step, running_loss / step), end="\r")
#             print(running_loss / step)
            running_corrects += torch.sum(preds == labels.data)
            if step > steps_per_epoch:
                break
        scheduler.step()

        epoch_loss = running_loss / steps_per_epoch
        epoch_acc = running_corrects / steps_per_epoch

        print('Loss: {:.4f} Acc: {:.4f}'.format(
            epoch_loss, epoch_acc))

        # deep copy the model
        if epoch_loss < best_loss:
#             model.save_state_dict('mytraining.pt')
            torch.save(model.state_dict(), 'mytraining.pt')
            best_loss = epoch_loss
            best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       gen_train,
                       num_epochs=50,
                       steps_per_epoch=500)

In [None]:
for X, Y in gen_train:
    break

In [None]:
X.shape

In [None]:
X.shape

In [None]:
np.expand_dims(Y, axis=1).shape

In [None]:
for x in range(10):
    print("Progress {}".format(x / 10), end="\r")
    time.sleep(1)

In [None]:
x / 10

In [None]:
model = applications.mobilenet_v2.MobileNetV2(include_top=True, weights='imagenet', input_shape=(224, 224, 3))
x = model.output
predictions = Dense(1, activation="sigmoid")(x)
model_final = Model(inputs = model.input, outputs = predictions)
model_final.compile(loss = "mean_squared_logarithmic_error", \
                    optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])

In [None]:
checkpoint = ModelCheckpoint("MobileNetV2_retrain.h5", monitor='accuracy', verbose=1, \
                             save_best_only=True, save_weights_only=False, mode='auto', save_freq=1)
reduce_lr = ReduceLROnPlateau(monitor='accuracy', factor=0.1, patience=7, verbose=0, \
                              mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

In [None]:
model_final.fit_generator(
  gen_train,
  steps_per_epoch = 500,
  epochs = 1000,
#   validation_data = gen_val,
#   validation_steps = 5,
  callbacks = [reduce_lr, checkpoint])

In [None]:
model_final.fit_generator(
  gen_train,
  steps_per_epoch = 500,
  epochs = 1000,
#   validation_data = gen_val,
#   validation_steps = 5,
  callbacks = [reduce_lr, checkpoint])

In [None]:
model_final.fit_generator(
  gen_train,
  steps_per_epoch = 500,
  epochs = 1000,
#   validation_data = gen_val,
#   validation_steps = 5,
  callbacks = [reduce_lr, checkpoint])

In [None]:
model_final.fit_generator(
  gen_train,
  steps_per_epoch = 500,
  epochs = 1000,
#   validation_data = gen_val,
#   validation_steps = 5,
  callbacks = [reduce_lr, checkpoint])

In [None]:
from tensorflow.python.platform import build_info as tf_build_info
print(tf_build_info.cuda_version_number)
# 9.0 in v1.10.0
print(tf_build_info.cudnn_version_number)

In [None]:
for X, Y in gen_train:
    break

In [None]:
X.shape

In [None]:
np.array(Y)

In [None]:
model_final.fit(X,np.array(Y))

In [None]:
import ctypes
ctypes.WinDLL("cudnn64_7.dll")

In [None]:
tf.test.is_built_with_cuda()

In [None]:
tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None)

## Extract audio from video

### Speech recogniziton. SLOW

In [None]:
import speech_recognition as sr

In [None]:
command2mp3 = 'ffmpeg -i ../deepfake_train_full\\dfdc_train_part_46\\dfdc_train_part_46\\aqsgzoxyok.mp4 \
    ../deepfake_train_full_audio\\aqsgzoxyok.wav'

In [None]:
os.system(command2mp3)

In [None]:
r = sr.Recognizer()

In [None]:
with sr.AudioFile('../deepfake_train_full_audio\\aqsgzoxyok.wav') as source:
    audio = r.record(source)

In [None]:
print("Sphinx thinks you said " + r.recognize_sphinx(audio))

In [None]:
audio

In [None]:
audio = sr.AudioFile('../deepfake_train_full_audio\\aqsgzoxyok.mp3')

In [None]:
audio = r.record(source, duration=100)

In [None]:
print(r.recognize_google(audio))

### Plot signal

In [None]:
command2mp3 = 'ffmpeg -i ../deepfake_train_full\\dfdc_train_part_26\\dfdc_train_part_26\\gpdtoamvkz.mp4 \
    ../deepfake_train_full_audio\\gpdtoamvkz.wav'
os.system(command2mp3)

command2mp3 = 'ffmpeg -i ../deepfake_train_full\\dfdc_train_part_26\\dfdc_train_part_26\\yojgjueqta.mp4 \
    ../deepfake_train_full_audio\\yojgjueqta.wav'
os.system(command2mp3)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import wave
import sys
import math


spf = wave.open('../deepfake_train_full_audio\\gpdtoamvkz.wav')

# Extract Raw Audio from Wav File
signal = spf.readframes(-1)
signal = np.fromstring(signal, "Int16")


# If Stereo
if spf.getnchannels() == 2:
    print("Just mono files")
    sys.exit(0)

plt.figure(1)
plt.title("Signal Wave...")
plt.plot(signal)
for i in range(int(len(signal)/48000)):
    plt.axvline(x=48000*(i+1), color='red')
plt.show()

spf = wave.open('../deepfake_train_full_audio\\yojgjueqta.wav')

# Extract Raw Audio from Wav File
signal = spf.readframes(-1)
signal = np.fromstring(signal, "Int16")

plt.figure(1)
plt.title("Signal Wave...")
plt.plot(signal)
for i in range(int(len(signal)/48000)):
    plt.axvline(x=48000*(i+1), color='red')
plt.show()