label smoothing

In [None]:
import os, sys, random
import numpy as np
import pandas as pd
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from tqdm.notebook import tqdm

%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
gpu = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
gpu

In [None]:
image_size = 224
batch_size = 64
cv = 1
LABELS = ['REAL', 'FAKE']
lr = 0.01
wd = 0.
patience = 5
factor = 0.7

# Data

In [None]:
crops_dir = "../input/deepfake98493faces/outputs/"

df = pd.read_csv("../input/deepfake98493faces/outputs/metadata.csv")
df.head()

In [None]:
len(df)

How many faces of each class do we have?

Look at a random face image:

In [None]:
img_path = os.path.join(crops_dir, np.random.choice(df.name_path.values))
plt.imshow(cv2.imread(img_path)[..., ::-1])

## Balance

In [None]:
real_df = df[df.label == 'REAL']
fake_df = df[df.label == 'FAKE']
print('Number of real is {}'.format(len(df[df.label == "REAL"])), 'Number of fake is {}'.format(len(df[df.label == "FAKE"])))

In [None]:
real_df_2 = real_df.sample(frac=1)
fake_df_2 = fake_df.sample(len(real_df_2))

balance_df = pd.concat([real_df_2, fake_df_2])
print(len(balance_df))

## Shuffle

In [None]:
from sklearn.utils import shuffle
shuffle_df = shuffle(balance_df)

## Split
60% train, 20% val, 20% test<br>

In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(shuffle_df, test_size=0.15)
# val_df, test_df = train_test_split(test_df, test_size=0.5)

print(len(train_df), len(val_df))
print(round(len(train_df)/len(shuffle_df), 2), 
      round(len(val_df)/len(shuffle_df), 2)
     )
assert(len(train_df) + len(val_df) == len(shuffle_df))


In [None]:
# len_cv = len(shuffle_df)//cv
# dfs = []

# for i in tqdm(range(cv)):  
#     val_df = shuffle_df[i*len_cv : (i+1)*len_cv]
#     train_df = shuffle_df.loc[~shuffle_df.index.isin(val_df.index)]
#     print(len(train_df), len(val_df))
    
#     assert len(val_df) + len(train_df) == len(shuffle_df)
    
#     now_df = {
#         'cv': i,
#         'train_df': train_df,
#         'val_df': val_df
#     }
#     dfs.append(now_df)
    

confirm validation sets has no intersection

In [None]:
def val_intersections():
    for i in range(cv):
        for j in range(cv):
            if i == j:
                continue
            else:
                a = set(dfs[i]['val_df'].index.values).intersection(set(dfs[j]['val_df'].index.values))
                if len(a) > 0:
                    print('error', i, j)

val_intersections()

# Augmentation

In [None]:
from torchvision.transforms import Normalize

class Unnormalize:
    """Converts an image tensor that was previously Normalize'd
    back to an image with pixels in the range [0, 1]."""
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        mean = torch.as_tensor(self.mean, dtype=tensor.dtype, device=tensor.device).view(3, 1, 1)
        std = torch.as_tensor(self.std, dtype=tensor.dtype, device=tensor.device).view(3, 1, 1)
        return torch.clamp(tensor*std + mean, 0., 1.)


mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
normalize = Normalize(mean, std)
unnormalize = Unnormalize(mean, std)

In [None]:
def random_hflip(img, p=0.5):
    """Random horizontal flip."""
    if random.random() < p:
        return cv2.flip(img, 1)
    else:
        return img

In [None]:
import albumentations
from albumentations.augmentations.transforms import ShiftScaleRotate, HorizontalFlip, Normalize, RandomBrightnessContrast, MotionBlur, Blur, GaussNoise, JpegCompression
train_transform = albumentations.Compose([
                                          ShiftScaleRotate(p=0.3, scale_limit=0.25, border_mode=1, rotate_limit=25),
                                          HorizontalFlip(p=0.2),
                                          RandomBrightnessContrast(p=0.3, brightness_limit=0.25, contrast_limit=0.5),
                                          MotionBlur(p=.2),
                                          GaussNoise(p=.2),
                                          JpegCompression(p=.2, quality_lower=50),
                                          Normalize()
])
val_transform = albumentations.Compose([Normalize()])

# GridMask

In [None]:
# from albumentations.core.transforms_interface import DualTransform
# from albumentations.augmentations import functional as F1

# ## The author details are there along with the function
# class GridMask(DualTransform):
#     """GridMask augmentation for image classification and object detection.
    
#     Author: Qishen Ha
#     Email: haqishen@gmail.com
#     2020/01/29

#     Args:
#         num_grid (int): number of grid in a row or column.
#         fill_value (int, float, lisf of int, list of float): value for dropped pixels.
#         rotate ((int, int) or int): range from which a random angle is picked. If rotate is a single int
#             an angle is picked from (-rotate, rotate). Default: (-90, 90)
#         mode (int):
#             0 - cropout a quarter of the square of each grid (left top)
#             1 - reserve a quarter of the square of each grid (left top)
#             2 - cropout 2 quarter of the square of each grid (left top & right bottom)

#     Targets:
#         image, mask

#     Image types:
#         uint8, float32

#     Reference:
#     |  https://arxiv.org/abs/2001.04086
#     |  https://github.com/akuxcw/GridMask
#     """

#     def __init__(self, num_grid=3, fill_value=0, rotate=0, mode=0, always_apply=False, p=0.5):
#         super(GridMask, self).__init__(always_apply, p)
#         if isinstance(num_grid, int):
#             num_grid = (num_grid, num_grid)
#         if isinstance(rotate, int):
#             rotate = (-rotate, rotate)
#         self.num_grid = num_grid
#         self.fill_value = fill_value
#         self.rotate = rotate
#         self.mode = mode
#         self.masks = None
#         self.rand_h_max = []
#         self.rand_w_max = []

#     def init_masks(self, height, width):
#         if self.masks is None:
#             self.masks = []
#             n_masks = self.num_grid[1] - self.num_grid[0] + 1
#             for n, n_g in enumerate(range(self.num_grid[0], self.num_grid[1] + 1, 1)):
#                 grid_h = height / n_g
#                 grid_w = width / n_g
#                 this_mask = np.ones((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w))).astype(np.uint8)
#                 for i in range(n_g + 1):
#                     for j in range(n_g + 1):
#                         this_mask[
#                              int(i * grid_h) : int(i * grid_h + grid_h / 2),
#                              int(j * grid_w) : int(j * grid_w + grid_w / 2)
#                         ] = self.fill_value
#                         if self.mode == 2:
#                             this_mask[
#                                  int(i * grid_h + grid_h / 2) : int(i * grid_h + grid_h),
#                                  int(j * grid_w + grid_w / 2) : int(j * grid_w + grid_w)
#                             ] = self.fill_value
                
#                 if self.mode == 1:
#                     this_mask = 1 - this_mask

#                 self.masks.append(this_mask)
#                 self.rand_h_max.append(grid_h)
#                 self.rand_w_max.append(grid_w)

#     def apply(self, image, mask, rand_h, rand_w, angle, **params):
#         h, w = image.shape[:2]
#         mask = F1.rotate(mask, angle) if self.rotate[1] > 0 else mask
#         mask = mask[:,:,np.newaxis] if image.ndim == 3 else mask
#         image *= mask[rand_h:rand_h+h, rand_w:rand_w+w].astype(image.dtype)
#         return image

#     def get_params_dependent_on_targets(self, params):
#         img = params['image']
#         height, width = img.shape[:2]
#         self.init_masks(height, width)

#         mid = np.random.randint(len(self.masks))
#         mask = self.masks[mid]
#         rand_h = np.random.randint(self.rand_h_max[mid])
#         rand_w = np.random.randint(self.rand_w_max[mid])
#         angle = np.random.randint(self.rotate[0], self.rotate[1]) if self.rotate[1] > 0 else 0

#         return {'mask': mask, 'rand_h': rand_h, 'rand_w': rand_w, 'angle': angle}

#     @property
#     def targets_as_params(self):
#         return ['image']

#     def get_transform_init_args_names(self):
#         return ('num_grid', 'fill_value', 'rotate', 'mode')
    
# train_transform = albumentations.Compose([
#                                           ShiftScaleRotate(p=0.3, scale_limit=0.25, border_mode=1),
#                                           HorizontalFlip(p=0.2),
#                                           albumentations.RandomCrop(150,150),
#                                           albumentations.OneOf([
#                                             GridMask(num_grid=(1,3),rotate=15),
#                                             GridMask(num_grid=(2,4), mode=0),
#                                             GridMask(num_grid=3, mode=2),
#                                           ], p=0.5),
#                                           Normalize()
# ])
# val_transform = albumentations.Compose([
#                                         albumentations.RandomCrop(150,150),  
#                                         Normalize()
# ])

In [None]:
# effect of augmentation

img_path = os.path.join(crops_dir, np.random.choice(df.name_path.values))
img1 = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
img2 = val_transform(**{'image':img1})['image']

fig, ax = plt.subplots(2, 3, figsize=(8, 8))
ax[0, 0].imshow(img1)
ax[0, 1].imshow(img2)
ax[0, 2].imshow(unnormalize(torch.tensor(img2).permute(2, 0, 1)).permute(1, 2, 0))


img_path = os.path.join(crops_dir, np.random.choice(df.name_path.values))
img1 = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
img2 = train_transform(**{'image':img1})['image']

ax[1, 0].imshow(img1)
ax[1, 1].imshow(img2)
ax[1, 2].imshow(unnormalize(torch.tensor(img2).permute(2, 0, 1)).permute(1, 2, 0))

In [None]:
from torch.utils.data import Dataset

class VideoDataset(Dataset):

    def __init__(self, df, split, augment=True):
        self.crops_dir = crops_dir
        self.split = split
        self.augment = augment
        self.df = df
        
        num_real = len(self.df[self.df["label"] == "REAL"])
        num_fake = len(self.df[self.df["label"] == "FAKE"])
        print("%s dataset has %d real videos, %d fake videos" % (split, num_real, num_fake))
  
    def __getitem__(self, index):
        row = self.df.iloc[index]
        filename = row["name_path"][:-4] + ".jpg"
        cls = row["label"]
        
        # get img
        img = cv2.imread(os.path.join(self.crops_dir, row["name_path"]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # augmentation
        if self.split == 'train' and self.augment:
            img = train_transform(**{'image': img})['image']
        elif self.split == 'val' and self.augment:
            img = val_transform(**{'image': img})['image']
        img = torch.tensor(img).permute((2, 0, 1)).float()
        
        # get label
        target = LABELS.index(row['label'])
        
        return img, target
    
    
    
    def __len__(self):
        return len(self.df)

Let's test that the dataset actually works...

In [None]:
dataset = VideoDataset(val_df, "val")
plt.imshow(unnormalize(dataset[0][0]).permute(1, 2, 0))
del dataset

In [None]:
train_dataset = VideoDataset(train_df, "train", True)
train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                      num_workers=4, pin_memory=True)

val_dataset = VideoDataset(val_df, "val", True)
val_dl = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                    num_workers=0, pin_memory=True)

test_dataset = VideoDataset(test_df, "val", True)
test_dl = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                    num_workers=0, pin_memory=True)

In [None]:
X, y = next(iter(train_dl))
plt.imshow(unnormalize(X[0]).permute(1, 2, 0))
print(y[0])

In [None]:
X, y = next(iter(val_dl))
plt.imshow(unnormalize(X[0]).permute(1, 2, 0))
print(y[0])

In [None]:
X, y = next(iter(test_dl))
plt.imshow(unnormalize(X[0]).permute(1, 2, 0))
print(y[0])

# Model

In [None]:
checkpoint = torch.load("../input/pretrained-pytorch/resnext50_32x4d-7cdf4587.pth")

In [None]:
import torchvision.models as models

class MyResNeXt(models.resnet.ResNet):
    def __init__(self, training=True):
        super(MyResNeXt, self).__init__(block=models.resnet.Bottleneck,
                                        layers=[3, 4, 6, 3], 
                                        groups=32, 
                                        width_per_group=4)

        self.load_state_dict(checkpoint)

        # Override the existing FC layer with a new one.
        self.fc = nn.Linear(2048, 1)

In [None]:
net = MyResNeXt().to(gpu)

In [None]:
del checkpoint

Test the model on a small batch to see what its output shape is:

In [None]:
out = net(torch.zeros((10, 3, 224, 224)).to(gpu))
out.shape

Freeze the early layers of the model:

In [None]:
def freeze_until(net, param_name):
    found_name = False
    for name, params in net.named_parameters():
        if name == param_name:
            found_name = True
        params.requires_grad = found_name

In [None]:
freeze_until(net, "layer4.0.conv1.weight")

These are the layers we will train:

In [None]:
[k for k,v in net.named_parameters() if v.requires_grad]

# Training & Evaluating

In [None]:
history = { "train_bce": [], "val_bce": [] }
iteration = 0
epochs_done = 0

optimizer = torch.optim.Adam(net.parameters(), lr=lr, weight_decay=wd)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=patience, mode='min', factor=factor, verbose=True, min_lr=1e-5)

In [None]:
def evaluate(net, data_loader, device, silent=False):
    net.train(False)

    bce_loss = 0
    total_examples = 0

    with tqdm(total=len(data_loader), desc="Evaluation", leave=False, disable=silent) as pbar:
        for batch_idx, data in enumerate(data_loader):
            with torch.no_grad():
                batch_size = data[0].shape[0]
                x = data[0].to(device)
                y_true = data[1].to(device).float()

                y_pred = net(x)
                y_pred = y_pred.squeeze()

                bce_loss += F.binary_cross_entropy_with_logits(y_pred, y_true).item() * batch_size

            total_examples += batch_size
            pbar.update()

    bce_loss /= total_examples

    if silent:
        return bce_loss
    else:
        print("BCE: %.4f" % (bce_loss))
        
    if scheduler is not None:
        scheduler.step(bce_loss)

In [None]:
evaluate(net, val_dl, device=gpu)

## Training

In [None]:
def fit(epochs, train_dl, val_dl, net, optimizer):
    global history, iteration, epochs_done, lr

    with tqdm(total=len(train_dl), leave=False) as pbar:
        for epoch in range(epochs):
            pbar.reset()
            pbar.set_description("Epoch %d" % (epochs_done + 1))
            
            bce_loss = 0
            total_examples = 0

            net.train(True)

            for batch_idx, data in enumerate(train_dl):
                batch_size = data[0].shape[0]
                x = data[0].to(gpu)
                y_true = data[1].to(gpu).float()
                
                optimizer.zero_grad()

                y_pred = net(x)
                y_pred = y_pred.squeeze()
                
                loss = F.binary_cross_entropy_with_logits(y_pred, y_true)
                loss.backward()
                optimizer.step()
                
                batch_bce = loss.item()
                bce_loss += batch_bce * batch_size
                history["train_bce"].append(batch_bce)

                total_examples += batch_size
                iteration += 1
                pbar.update()

            bce_loss /= total_examples
            epochs_done += 1

            print("Epoch: %3d, train BCE: %.4f" % (epochs_done, bce_loss))

            val_bce_loss = evaluate(net, val_dl, device=gpu, silent=True)
            history["val_bce"].append(val_bce_loss)
            
            print("              val BCE: %.4f" % (val_bce_loss))

            # TODO: can do LR annealing here
            # TODO: can save checkpoint here
            # save the best model
            if round(val_bce_loss, 4) <= round(min(history['val_bce']), 4):
                print('save the model')
                ck = {'epoch': epochs_done, 
                     'state_dict': net.state_dict(),
                     'optimizer': optimizer.state_dict(),
                     'val_loss': round(val_bce_loss, 4)}
                path = 'best_model_.pth'
                torch.save(ck, path)
                

            print("")

At this point you can load the model from the previous checkpoint. If you do, also make sure to restore the optimizer state! Something like this:

```python
checkpoint = torch.load("model-checkpoint.pth")
net.load_state_dict(checkpoint)

checkpoint = torch.load("optimizer-checkpoint.pth")
optimizer.load_state_dict(checkpoint)
```

Let's start training!

In [None]:
def set_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

In [None]:
set_lr(optimizer, lr)
fit(5, train_dl, val_dl, net, optimizer)

Plot training progress. It's nicer to use something like TensorBoard for this, but a simple plot also works. ;-)

In [None]:
plt.plot(history["train_bce"])

In [None]:
plt.plot(history["val_bce"])

**All done!** You can now use this checkpoint in the [inference kernel](https://www.kaggle.com/humananalog/inference-demo).

# Test

In [None]:
# final model
evaluate(net, test_dl, device=gpu)

In [None]:
# reset to best model
ck = torch.load('best_model_.pth')
net.load_state_dict(ck['state_dict'])
optimizer.load_state_dict(ck['optimizer'])

In [None]:
# best model 
evaluate(net, test_dl, device=gpu)

In [None]:
# dwonload the file
from IPython.display import FileLink
FileLink('best_model_.pth') 