# Basic Setup

Import code from either Google Colab or local drive.
Select that option by either executing the first or second cell.

In [0]:
# SET HERE if notebook gets executed on google colab or locally
is_on_colab = True

In [2]:
if is_on_colab:
    # Google Colab setup
    from google.colab import drive
    drive.mount('/content/drive')

    import os
    os.chdir("/content/drive/My Drive/adl4cv")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# ONLY NECESSARY FOR LOCAL EXECUTION (WORKS WITHOUT THIS CELL IN GOOGLE COLAB)
# Setup that is necessary for jupyter notebook to find sibling-directories
# see: https://stackoverflow.com/questions/34478398/import-local-function-from-a-module-housed-in-another-directory-with-relative-im

if not is_on_colab:
    import os
    import sys
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

In [0]:
# Imports for this notebook

from networks.temporal_encoder_network import TemporalEncoder
from training.solver import Solver
from training.sequence_dataloader import FaceForensicsVideosDataset, ToTensor
from torch.utils import data
from torch.utils.data.sampler import SubsetRandomSampler
import torch

In [5]:
# Check training on GPU?

cuda = torch.cuda.is_available()

print("Training is on GPU with CUDA: {}".format(cuda))

device = "cuda:0" if cuda else "cpu"

print("Device: {}".format(device))


Training is on GPU with CUDA: True
Device: cuda:0


# Load Data and Model

*   Load FaceForensics sequences: Choose a list of corresponding file-paths.
*   Load the model for this notebook.
*   Enable or disable usage of optical flow / warp as input.

In [6]:
# Decide here if using the 100 videos dataset or the 1000 videos dataset

is_large_dataset = True # because we constructed two different folders, one with 100 videos, one with 1000 videos, we need to choose here for file-paths to be resolved correctly later.

print("Will use 100 videos dataset: {}. Will use 1000 videos dataset: {}".format(not is_large_dataset, is_large_dataset))

# Decide here if using optical flow is desired
# If yes: warps will be calculated in dataloader and model/solver will get it as an input
# If no: warps will not be calculated and model/solver do not get it as an input

opticalFlowEnabled = False

print("Will use Optical Flow / Warp as input for network: {}".format(opticalFlowEnabled))


Will use 100 videos dataset: False. Will use 1000 videos dataset: True
Will use Optical Flow / Warp as input for network: False


In [0]:
# Load Dataset from drive location

root_path = "/content/drive/My Drive/" if is_on_colab else "F:/Google Drive/"
root_dir = root_path # saved for stuff that does not need the faceforensics suffix in it, e.g. in model saving

dataset_root = "FaceForensics_large" if is_large_dataset else "FaceForensics_Sequences"
root_path += dataset_root

sequence = "sequences_299x299_10seq@10frames_skip_5_uniform" if is_large_dataset else "sequences_299x299_5seq@10frames_skip_5_uniform"
#sequence = "sequences_299x299_10seq@5frames_skip_1_uniform"

original_location = "/original_sequences/youtube/c40/" + sequence
deepfake_location = "/manipulated_sequences/Deepfakes/c40/" + sequence
face2face_location = "/manipulated_sequences/Face2Face/c40/" + sequence
faceswap_location = "/manipulated_sequences/FaceSwap/c40/" + sequence
neuraltextures_location = "/manipulated_sequences/NeuralTextures/c40/" + sequence

locations = [original_location, deepfake_location, face2face_location, faceswap_location, neuraltextures_location] #deepfake_location, face2face_location, faceswap_location

train_loc = [root_path + s + ("/train" if is_large_dataset else "") for s in locations]
val_loc = [root_path + s + ("/val" if is_large_dataset else "") for s in locations] 

train_dict = {"train-dataset-" + str(i): train_loc[i] for i in range(len(train_loc))}
val_dict = {"val-dataset-" + str(i): val_loc[i] for i in range(len(val_loc))}

# when using two fake variants: multiply fake-loss by 0.5 to account for twice as many fake than original samples
fake_weight_factor = 1.0 / (len(train_loc) - 1)

data_dict = {"fake_weight_factor": fake_weight_factor, **train_dict, **val_dict}


train_dataset = FaceForensicsVideosDataset(train_loc,
                                     transform=ToTensor(),
                                     num_frames=10, #num_frames=10
                                     #max_number_videos_per_directory=100,
                                     max_number_sequences_per_video=1,
                                     calculateOpticalFlow=opticalFlowEnabled,
                                     verbose=False,
                                     caching=False)

val_dataset = FaceForensicsVideosDataset(val_loc,
                                     transform=ToTensor(),
                                     num_frames=10, #num_frames=10
                                     #max_number_videos_per_directory=4,
                                     max_number_sequences_per_video=1,
                                     calculateOpticalFlow=opticalFlowEnabled,
                                     verbose=False,
                                     caching=False)

print("Loaded following data: {}".format(data_dict))

Loading directory 1/5: /content/drive/My Drive/FaceForensics_large/original_sequences/youtube/c40/sequences_299x299_10seq@10frames_skip_5_uniform/train


HBox(children=(IntProgress(value=0, max=720), HTML(value='')))

Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.
Reached maximum number of sequences per video (1), will skip the rest.


In [0]:
# Setup pytorch dataloaders
# from: https://stackoverflow.com/questions/50544730/how-do-i-split-a-custom-dataset-into-training-and-test-datasets

dataset_args = {
    "batch_size": 16,
    "validation_percentage": 0.2,
    "sequence": sequence,
    **data_dict
}

# Should set num_workers=0, otherwise the caching in the dataset does not work... but why?
num_workers = 4

if is_large_dataset:
    # here we have separate folders in the dataset: /train and /val --> use all of it for both datasets

    # Should set num_workers=0, otherwise the caching in the dataset does not work... but why?
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=dataset_args["batch_size"], 
                                            num_workers=num_workers,
                                            shuffle=True)

    validation_loader = torch.utils.data.DataLoader(val_dataset, batch_size=dataset_args["batch_size"],
                                                  num_workers=num_workers,
                                                shuffle=True)
else:
    # here we have one folder for train+val and must split it accordingly ourselfes at runtime
    # Creating data indices for training and validation splits:
    train_indices, val_indices = train_dataset.get_train_val_lists(1 - dataset_args["validation_percentage"], dataset_args["validation_percentage"])

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    # Should set num_workers=0, otherwise the caching in the dataset does not work... but why?
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=dataset_args["batch_size"], 
                                            num_workers=num_workers,
                                            sampler=train_sampler)

    validation_loader = torch.utils.data.DataLoader(val_dataset, batch_size=dataset_args["batch_size"],
                                                  num_workers=num_workers,
                                                  sampler=val_sampler)


dataset_args["train_len"] = len(train_loader)
dataset_args["val_len"] = len(validation_loader)

print("Dataset parameters: {}".format(dataset_args))

In [0]:
# Load Temporal Encoder II Model

model_args={
    "model_choice": "xception",
    "num_out_classes": 2,
    "dropout": 0.5,
    "num_input_images": 10, #10
    "feature_dimension": 128,
    "temporal_encoder_depth": 5,
    "delta_t": 2,
    "useOpticalFlow": opticalFlowEnabled
}

model = TemporalEncoder(num_input_images=model_args["num_input_images"],
                        model_choice=model_args["model_choice"],
                        feature_dimension=model_args["feature_dimension"],
                        temporal_encoder_depth=model_args["temporal_encoder_depth"],
                        dropout=model_args["dropout"],
                        num_out_classes=model_args["num_out_classes"],
                        delta_t=model_args["delta_t"],
                        useOpticalFlow=model_args["useOpticalFlow"])

model_args["model"] = type(model).__name__

print("Model configuration: {}".format(model_args))

print("Only the following layers of feature extractor component (pretrained!) require gradient backpropagation (param.requires_grad)")
for name, param in model.feature_extractor.named_parameters():
    if param.requires_grad:
        print("param: {} requires_grad: {}".format(name, param.requires_grad))

# Training Visualization

Start Tensorboard for visualization of the upcoming training / validation / test steps.

In [0]:
# Start tensorboard
%load_ext tensorboard
%tensorboard --logdir runs

# Training

Start training process.

In [0]:
# Create unique ID for this training process for saving to disk.

from datetime import datetime
import uuid
now = datetime.now() # current date and time
id = str(uuid.uuid1())
id_suffix = now.strftime("%Y-%b-%d_%H-%M-%S") + "_" + id

log_dir = "runs/TemporalEncoderII/" + id_suffix

if not is_on_colab:
    log_dir = "../" + log_dir

print("log_dir:", log_dir)

In [0]:
# Configure solver
extra_args = {
    **model_args,
    **dataset_args
}

weights = [dataset_args["fake_weight_factor"], 1.0]
class_weights = torch.FloatTensor(weights).to(device)

print("Using following weighting scheme in cross-entropy-loss: {}\n".format(class_weights))

solver = Solver(optim=torch.optim.Adam,
                optim_args={ "lr": 4e-4,
                             "betas": (0.9, 0.999),
                             "eps": 1e-8,
                             "weight_decay": 0.01}, # is the l2 regularization parameter, see: https://pytorch.org/docs/stable/optim.html
                loss_func=torch.nn.CrossEntropyLoss(weight=class_weights),
                extra_args=extra_args,
                log_dir=log_dir)

In [0]:
# Start training

'''
During the first epoch, all required images (and warps) are loaded into RAM, if the option caching was chosen previously.
This might take a while, but after the first epoch, it will be faster.
The loading is done during the first epoch and not when loading the dataloader,
because it would take ~1 hour (in google colab) just to load all data before we can see any 
training output. Instead, we can start right away and load just-in-time.

Note that the first epoch is only slow when a brand-new data_loader is used.
Using the same data_loader for multiple trainings keeps the images loaded.

Warning: Only use this if the RAM is big enough to store all images...
'''

solver.train(model, train_loader, validation_loader, num_epochs=10, log_nth=50)

# Test

Test with same fake set and different fake set as for training.
Will load the data and start the training.

Visualizations can be seen in Tensorboard above.

In [0]:
# Load test data for all fake types that were trained on according to "locations" variable defined above

#same_test_data_location = ["/content/drive/My Drive/FaceForensics_Sequences/FaceForensics_Testset/original_sequences/youtube/c40/sequences_299x299_5seq@10frames_skip_5_uniform",
#                 "/content/drive/My Drive/FaceForensics_Sequences/FaceForensics_Testset/manipulated_sequences/Deepfakes/c40/sequences_299x299_5seq@10frames_skip_5_uniform"]

#same_test_data_location = ["/content/drive/My Drive/FaceForensics_Sequences/FaceForensics_Testset/original_sequences/youtube/c40/sequences_299x299_10seq@5frames_skip_1_uniform",
#                 "/content/drive/My Drive/FaceForensics_Sequences/FaceForensics_Testset/manipulated_sequences/Deepfakes/c40/sequences_299x299_10seq@5frames_skip_1_uniform"]

if is_large_dataset:
    same_test_data_location = [root_path + s + "/test" for s in locations]
else:
    same_test_data_location = [root_path + "/FaceForensics_Testset" + s for s in locations]
    
same_test_dataset = FaceForensicsVideosDataset(same_test_data_location,
                                               transform=ToTensor(),
                                               num_frames=10, #num_frames=5
                                               #max_number_videos_per_directory=20,
                                               max_number_sequences_per_video=1,
                                               calculateOpticalFlow=opticalFlowEnabled,
                                               verbose=False,
                                               caching=not is_large_dataset)

same_test_indices = range(len(same_test_dataset))

same_test_sampler = SubsetRandomSampler(same_test_indices)

same_test_loader = torch.utils.data.DataLoader(same_test_dataset,
                                               batch_size=dataset_args["batch_size"], 
                                               sampler=same_test_sampler,
                                               num_workers=4)

print("Length of same fake test set: {}".format(len(same_test_dataset)))
print("Loaded test set: {}".format(same_test_data_location))

In [0]:
# Start testing

solver.test(model, same_test_loader, test_prefix="Same_Fake_Method", log_nth=10)

In [0]:
# Load test data for one specific (different) fake type

fake_type = "Deepfakes" # NeuralTextures, FaceSwap, Face2Face, Deepfakes, Pristine

# because we constructed two different folders, one with 100 videos, one with 1000 videos and the directory structure is different
if is_large_dataset:
    dif_test_data_location = [root_path + "/original_sequences/youtube/c40/" + sequence + "/test",
                              root_path + "/manipulated_sequences/" + fake_type + "/c40/" + sequence + "/test"]
else:
    dif_test_data_location = [root_path + "/FaceForensics_Testset/manipulated_sequences/" + fake_type + "/c40/" + sequence,
                              root_path + "/FaceForensics_Testset/original_sequences/youtube/c40/" + sequence,
                              ]
#root_path + "/FaceForensics_Testset/original_sequences/youtube/c40/" + sequence,
#root_path + "/FaceForensics_Testset/manipulated_sequences/" + fake_type + "/c40/" + sequence
dif_test_dataset = FaceForensicsVideosDataset(dif_test_data_location,
                                              transform=ToTensor(),
                                              num_frames=10,
                                              #max_number_videos_per_directory=4,
                                              max_number_sequences_per_video=1,
                                              calculateOpticalFlow=opticalFlowEnabled,
                                              verbose=False,
                                              caching=not is_large_dataset) #num_frames=5

dif_test_indices = range(len(dif_test_dataset))

#dif_test_sampler = SubsetRandomSampler(dif_test_indices)

dif_test_loader = torch.utils.data.DataLoader(dif_test_dataset,
                                              batch_size=16, 
                                              shuffle=True,
                                              num_workers=4)

print("Length of dif fake test set: {}".format(len(dif_test_dataset)))

In [0]:
# Start testing

solver.test(model, dif_test_loader, test_prefix="Different_Fake_Method_" + fake_type, log_nth=1)

# Save the model

Save network with its weights to disk.

See torch.save function: https://pytorch.org/docs/stable/notes/serialization.html#recommend-saving-models 

Load again with `the_model = TheModelClass(*args, **kwargs) the_model.load_state_dict(torch.load(PATH))`

In [0]:
def save_model(modelname, model):
    filepath = root_dir + "/adl4cv/saved_results/models/" + modelname + ".pt"
    torch.save(model.state_dict(), filepath)

In [0]:
save_model("temporal_encoder_2_" + id_suffix, model)

In [0]:
# LOAD MODEL AGAIN for verification purposes
# Should print: <All keys matched successfully>
filepath = root_dir + "/adl4cv/saved_results/models/" + "temporal_encoder_2_" + id_suffix + ".pt"
#filepath = root_dir + "/adl4cv/saved_results/models/" + "temporal_encoder_2_2020-Jan-27_09-01-05_8cf697ac-40e3-11ea-978f-0242ac1c0002.pt"
model.load_state_dict(torch.load(filepath))

#"temporal_encoder_2_" + id_suffix + ".pt"