this kernel use kaggle-real-and-fake-faces dataset

In [87]:
import numpy as np
import pandas as pd
import os 
import matplotlib.pyplot as plt
import gc
import random

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model, save_model
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import * 
from tensorflow.keras.utils import *
from tensorflow.keras.optimizers import *

# !pip install efficientnet -q
import efficientnet.tfkeras as efn
import gc
import glob
import itertools
import cv2

from sklearn.cluster import KMeans
from matplotlib_venn import venn2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
from termcolor import colored

In [88]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cpu')

In [89]:
gc.collect()

40

# Data

#### Balance

In [90]:
fake_imgs = glob.glob('/Users/dph/downloads/data-real-and-fake-face-detection/training_fake/*.jpg')
len(fake_imgs)

960

In [91]:
fake_y = np.ones(len(fake_imgs))
len(fake_y)

960

In [92]:
real_imgs = glob.glob('/Users/dph/downloads/data-real-and-fake-face-detection/training_real/*.jpg')[:len(fake_imgs)]
len(real_imgs)

960

In [93]:
real_y = np.zeros(len(fake_imgs))
len(real_y)

960

In [94]:
imgs = real_imgs + fake_imgs
len(imgs)

1920

In [95]:
y = np.hstack((real_y, fake_y))
len(y)

1920

#### shuffle

In [96]:
c = list(zip(imgs, y))
random.shuffle(c)
imgs, y = zip(*c)

#### split

In [97]:
train_imgs, test_imgs, train_y, test_y = train_test_split(imgs, y, test_size=0.4)
val_imgs, test_imgs, val_y, test_y = train_test_split(test_imgs, test_y, test_size=0.2)
print(len(train_imgs), len(val_imgs), len(test_imgs))
print(len(train_y), len(val_y), len(test_y))

1152 614 154
1152 614 154


# Agumentation

In [98]:
# get mean and std for all data
class MeanDataset(Dataset):
    def __init__(self, imgs):
        self.imgs = imgs
        
    def __getitem__(self, idx):
        img_path = self.imgs[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (224, 224))/255.
        return img

    def __len__(self):
        return len(self.imgs)
    

dataset = MeanDataset(imgs)
loader = DataLoader(
    dataset,
    batch_size=10,
    num_workers=0,
    shuffle=False
)


mean = 0.
std = 0.
nb_samples = 0.
for data in tqdm(loader, total=len(loader)):
    batch_samples = data.size(0)
    
    mean += data.float().mean()
    std += data.float().std()
    nb_samples += 1

mean /= nb_samples
std /= nb_samples

print(mean, std)

HBox(children=(IntProgress(value=0, max=192), HTML(value='')))


tensor(0.4418) tensor(0.2753)


In [99]:
from albumentations import Normalize, HorizontalFlip, Compose, RandomSizedCrop

In [100]:
train_tf = Compose([HorizontalFlip(p=0.5), 
                    RandomSizedCrop(min_max_height=(180, 180), height=380, width=380, p=0.5),
                    Normalize(mean=0.4418, std=0.2746)]) # this already includes /255.
val_tf = Compose([Normalize(mean=0.4418, std=0.2746)])

In [101]:
class MyDataset(Dataset):
    def __init__(self, imgs, y, split):
        self.imgs = imgs
        self.y = y
        self.split = split
        
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, idx):
        img_path = self.imgs[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (380, 380))
        
        # augment
        if self.split == 'train':
            img = train_tf(**{'image': img})['image']
        elif self.split == 'val':
            img = val_tf(**{'image': img})['image']
            
        # 
        img = img.transpose(2, 0, 1)
        
        l = self.y[idx]
        return img, l

In [102]:
train_ds = MyDataset(train_imgs, train_y, 'train')
train_dl = DataLoader(train_ds, batch_size=32, shuffle=False)

val_ds = MyDataset(val_imgs, val_y, 'val')
val_dl = DataLoader(val_ds, batch_size=32, shuffle=False)

test_ds = MyDataset(test_imgs, test_y, 'val')
test_dl = DataLoader(test_ds, batch_size=32, shuffle=False)

In [103]:
x, y = next(iter(train_dl))
print(x.shape, y.shape)

torch.Size([32, 3, 380, 380]) torch.Size([32])


In [104]:
x, y = next(iter(val_dl))
print(x.shape, y.shape)

torch.Size([32, 3, 380, 380]) torch.Size([32])


In [105]:
x, y = next(iter(test_dl))
print(x.shape, y.shape)

torch.Size([32, 3, 380, 380]) torch.Size([32])


# Model

In [106]:
from efficientnet_pytorch import EfficientNet

In [107]:
class MyModel(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.base = EfficientNet.from_pretrained('efficientnet-b4')
        self.fc = nn.Linear(self.base._fc.out_features, 1)
        
    def forward(self, x):
        x = self.base(x)
        x = self.fc(x)
        return x
model = MyModel() 
model = model.to(device)

Loaded pretrained weights for efficientnet-b4


In [108]:
# freeze
def freeze_until(model, layer):
    flag = False
    for n, p in model.named_parameters():
        if n == layer:
            flag = True
        p.requires_grad = flag

In [109]:
# freeze_until(model, 'base._conv_head.weight')
# for n, p in model.named_parameters():
#     if p.requires_grad:
#         print(n)

In [110]:
x = torch.tensor(np.random.randn(2, 3, 224, 224)).float()
y = model(x)
print(y.shape)

torch.Size([2, 1])


# Training

In [111]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, factor=0.2)

In [112]:
prev = np.ones(len(train_dl))

In [116]:
def train_on(epoch, train_dl):
    # prepare
    torch.cuda.empty_cache()
    gc.collect()
    model.train()
    
    loss_epoch = []
    accuracy_epoch = []
    for i, (x, y) in tqdm(enumerate(train_dl), total=len(train_dl)):
        y_pred = model(x)
        loss = F.binary_cross_entropy_with_logits(y_pred.squeeze(), y)
        
        if prev[i] == 1:
            prev[i] = loss
            print('Epoch: %d, Step: %d, Loss: %2f' % (epoch, i, loss))
        else:
            if loss < prev[i]:
                print(colored('Epoch: %d, Step: %d, Loss: %2f' % (epoch, i, loss), 'green'))
            else:
                print(colored('Epoch: %d, Step: %d, Loss: %2f' % (epoch, i, loss), 'red'))
                
            prev[i] = loss
            
        
        optimizer.zero_grad()
        loss.backward() # calculate grad
        optimizer.step() # update grad
        
        accuracy = (sum(torch.round(y) == torch.round(nn.Sigmoid()(y_pred.squeeze()))).float() / len(y)).item()
        # add
        loss_epoch.append(loss.item())
        accuracy_epoch.append(accuracy)
        
        
        del x, y, y_pred, loss, accuracy
        
    train_loss = sum(loss_epoch) / len(loss_epoch)
    train_accuracy = sum(accuracy_epoch) / len(accuracy_epoch)

    return train_loss, train_accuracy


In [117]:
def train(epochs, train_dl):
    for e in range(epochs):
        train_on(e, train_dl)

In [118]:
train(1, train_dl)

HBox(children=(IntProgress(value=0, max=36), HTML(value='')))

[32mEpoch: 0, Step: 0, Loss: 0.629555[0m
Epoch: 0, Step: 1, Loss: 0.737463
Epoch: 0, Step: 2, Loss: 0.706230
Epoch: 0, Step: 3, Loss: 0.740758
Epoch: 0, Step: 4, Loss: 0.703336
Epoch: 0, Step: 5, Loss: 0.712074
Epoch: 0, Step: 6, Loss: 0.716560
Epoch: 0, Step: 7, Loss: 0.702221
Epoch: 0, Step: 8, Loss: 0.715054
Epoch: 0, Step: 9, Loss: 0.724207
Epoch: 0, Step: 10, Loss: 0.709788
Epoch: 0, Step: 11, Loss: 0.726637
Epoch: 0, Step: 12, Loss: 0.726076
Epoch: 0, Step: 13, Loss: 0.673758
Epoch: 0, Step: 14, Loss: 0.603374
Epoch: 0, Step: 15, Loss: 0.737507
Epoch: 0, Step: 16, Loss: 0.619352
Epoch: 0, Step: 17, Loss: 0.617687
Epoch: 0, Step: 18, Loss: 0.779100
Epoch: 0, Step: 19, Loss: 0.654782
Epoch: 0, Step: 20, Loss: 0.650066
Epoch: 0, Step: 21, Loss: 0.650195
Epoch: 0, Step: 22, Loss: 0.778417
Epoch: 0, Step: 23, Loss: 0.695373
Epoch: 0, Step: 24, Loss: 0.718566
Epoch: 0, Step: 25, Loss: 0.615077
Epoch: 0, Step: 26, Loss: 0.654718
Epoch: 0, Step: 27, Loss: 0.678903
Epoch: 0, Step: 28, L

In [None]:
test_prev = np.ones(len(val_dl))
def test_on():
    model.eval()
    
    loss_epoch = []
    accuracy_epoch = []
    
    for i, (x, y) in tqdm(enumerate(val_dl), total=len(val_dl)):
        with torch.no_grad():
            x = x.to(device)
            y = y.to(device)
            y_preds = model(x).squeeze()
            loss = F.binary_cross_entropy_with_logits(y_preds, y)
            
            if test_prev[i] == 1:
                print('Step: %d, Test loss: %2f' % (i, loss))
            else:
                if loss < test_prev[i]:
                    print(colored('Step: %d, Test loss: %2f' % (i, loss), 'green'))
                else:
                    print(colored('Step: %d, Test loss: %2f' % (i, loss), 'red'))
            test_prev[i] = loss
            
            accuracy = (sum(torch.round(y) == torch.round(nn.Sigmoid()(y_pred))).float() / len(y)).item()
            # add
            loss_epoch.append(loss.item())
            accuracy_epoch.append(accuracy)
        
            del x, y, y_pred, loss, accuracy
    
    val_loss = sum(loss_epoch)/len(loss_epoch)
    val_accuracy = sum(accuracy_epoch) / len(accuracy_epoch)

    return val_loss, val_accuracy

In [119]:
test_on()

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

[32mStep: 0, Test loss: 0.631987[0m
[31mStep: 1, Test loss: 0.678470[0m
[31mStep: 2, Test loss: 0.707027[0m
[32mStep: 3, Test loss: 0.647057[0m
[32mStep: 4, Test loss: 0.670705[0m
[31mStep: 5, Test loss: 0.668898[0m
[32mStep: 6, Test loss: 0.638163[0m
[32mStep: 7, Test loss: 0.676597[0m
[31mStep: 8, Test loss: 0.660896[0m
[32mStep: 9, Test loss: 0.659189[0m
[31mStep: 10, Test loss: 0.701660[0m
[32mStep: 11, Test loss: 0.705386[0m
[32mStep: 12, Test loss: 0.640857[0m
[31mStep: 13, Test loss: 0.698955[0m
[32mStep: 14, Test loss: 0.664973[0m
[31mStep: 15, Test loss: 0.668699[0m
[32mStep: 16, Test loss: 0.648419[0m
[31mStep: 17, Test loss: 0.697917[0m
[31mStep: 18, Test loss: 0.724641[0m
[31mStep: 19, Test loss: 0.674494[0m

