# DFNet
Original repo: [hughplay/DFNet](https://github.com/hughplay/DFNet)

Fork with training code: [Yukariin/DFNet](https://github.com/Yukariin/DFNet)

~~Differentiable Augmentation: [mit-han-lab/data-efficient-gans](https://github.com/mit-han-lab/data-efficient-gans)~~ Currently no GAN loss.

Warning: Black means inpainted area and white means original area.

In [None]:
!nvidia-smi

# Training

In [None]:
#@title Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
print('Google Drive connected.')

In [None]:
#@title install
#!git clone https://github.com/Yukariin/DFNet
!git clone https://github.com/styler00dollar/Colab-DFNet
!pip install tensorboardX
!pip install LPIPS

%cd /content/Colab-DFNet
!pip install gdown
# places
!gdown --id 1SGJ_Z9kpchdnZ3Qwwf4HnN-Cq-AeK7vH
!mkdir /content/train_data

# You must create /images and /ckpt in your path, or training will crash
!mkdir '/content/drive/MyDrive/Colab-DFNet'
!mkdir '/content/drive/MyDrive/Colab-DFNet/images/'
!mkdir '/content/drive/MyDrive/Colab-DFNet/ckpt/'

[Experimental] Additional losses: HFENLoss (high frequency error norm), ElasticLoss, RelativeL1, L1CosineSim, ClipL1, FFTloss, OFLoss (Overflow loss), GPLoss (Gradient Profile (GP) loss), CPLoss (Color Profile (CP) loss), Contextual_Loss and LPIPS. Config weight value and combination in ```loss.py```.

Warning: If AMP is used together with Style loss, then it will result in Nan errors.

In [None]:
#@title loss.py (tensorboard iteration offset)
%%writefile /content/Colab-DFNet/loss.py

resume_iteration = 0

from vic.loss import CharbonnierLoss, GANLoss, GradientPenaltyLoss, HFENLoss, TVLoss, GradientLoss, ElasticLoss, RelativeL1, L1CosineSim, ClipL1, MaskedL1Loss, MultiscalePixelLoss, FFTloss, OFLoss, L1_regularization, ColorLoss, AverageLoss, GPLoss, CPLoss, SPL_ComputeWithTrace, SPLoss, Contextual_Loss, StyleLoss
from vic.perceptual_loss import PerceptualLoss
from vic.filters import *
from vic.colors import *
from vic.discriminators import *


from tensorboardX import SummaryWriter

logdir='/content/drive/MyDrive/Colab-DFNet'

writer = SummaryWriter(logdir=logdir)

from collections import namedtuple

import torch
import torch.nn as nn
from torchvision import models

from utils import resize_like
from metrics import *

from torchvision.utils import save_image


class InpaintingLoss(nn.Module):
    def __init__(self, p=[0, 1, 2,3,4,5], q=[0, 1, 2,3,4,5],
                 w=[6., 0.1, 240., 0.1]):
        super().__init__()

        #self.l1 = nn.L1Loss()
        #self.perceptual = PerceptualLoss()
        #self.style = StyleLoss()

        # new loss
        """
        if self.config.HFEN_TYPE == 'L1':
          l_hfen_type = nn.L1Loss()
        if self.config.HFEN_TYPE == 'MSE':
          l_hfen_type = nn.MSELoss()
        if self.config.HFEN_TYPE == 'Charbonnier':
          l_hfen_type = CharbonnierLoss()
        if self.config.HFEN_TYPE == 'ElasticLoss':
          l_hfen_type = ElasticLoss()
        if self.config.HFEN_TYPE == 'RelativeL1':
          l_hfen_type = RelativeL1()
        if self.config.HFEN_TYPE == 'L1CosineSim':
          l_hfen_type = L1CosineSim()
        """

        l_hfen_type = L1CosineSim()
        self.HFENLoss = HFENLoss(loss_f=l_hfen_type, kernel='log', kernel_size=15, sigma = 2.5, norm = False)

        self.ElasticLoss = ElasticLoss(a=0.2, reduction='mean')

        self.RelativeL1 = RelativeL1(eps=.01, reduction='mean')

        self.L1CosineSim = L1CosineSim(loss_lambda=5, reduction='mean')

        self.ClipL1 = ClipL1(clip_min=0.0, clip_max=10.0)

        self.FFTloss = FFTloss(loss_f = torch.nn.L1Loss, reduction='mean')

        self.OFLoss = OFLoss()

        self.GPLoss = GPLoss(trace=False, spl_denorm=False)

        self.CPLoss = CPLoss(rgb=True, yuv=True, yuvgrad=True, trace=False, spl_denorm=False, yuv_denorm=False)

        self.StyleLoss = StyleLoss()

        self.TVLoss = TVLoss(tv_type='tv', p = 1)

        self.PerceptualLoss = PerceptualLoss(model='net-lin', net='alex', colorspace='rgb', spatial=False, use_gpu=True, gpu_ids=[0], model_path=None)

        layers_weights = {'conv_1_1': 1.0, 'conv_3_2': 1.0}
        self.Contextual_Loss = Contextual_Loss(layers_weights, crop_quarter=False, max_1d_size=100,
            distance_type = 'cosine', b=1.0, band_width=0.5,
            use_vgg = True, net = 'vgg19', calc_type = 'regular')

        self.psnr_metric = PSNR()
        #self.ssim_metric = SSIM()
        #self.ae_metric = AE()
        #self.mse_metric = MSE()


        #self.p = p
        #self.q = q
        #self.w = w
        #self.counter = 0

    def forward(self, input, gt, iteration):

        # just one loop
        total_loss = 0.0

        L1CosineSim_forward = 0.0
        perceptual_forward = 0.0
        style_forward = 0.0
        tv_forward = 0.0
        PSNR_value = 0


        # default batchsize 6
        for i in range(6):
          out = input[0][i]
          gt_res = gt[i]

          out = out.unsqueeze(0)
          gt_res = gt_res.unsqueeze(0)


          # new loss
          """
          HFENLoss_forward = self.HFENLoss(out, gt_res)
          total_loss += HFENLoss_forward

          ElasticLoss_forward = self.ElasticLoss(out, gt_res)
          total_loss += ElasticLoss_forward

          RelativeL1_forward = self.RelativeL1(out, gt_res)
          total_loss += RelativeL1_forward
          """
          L1CosineSim_forward += 6*self.L1CosineSim(out, gt_res)
          #total_loss += L1CosineSim_forward

          #writer.add_scalar('loss/L1CosineSim', L1CosineSim_forward, iteration)
          #total_loss += L1CosineSim_forward
          """
          ClipL1_forward = self.ClipL1(out, gt_res)
          total_loss += ClipL1_forward

          FFTloss_forward = self.FFTloss(out, gt_res)
          total_loss += FFTloss_forward

          OFLoss_forward = self.OFLoss(out)
          total_loss += OFLoss_forward

          GPLoss_forward = self.GPLoss(out, gt_res)
          total_loss += GPLoss_forward

          CPLoss_forward = 0.1*self.CPLoss(out, gt_res)
          total_loss += CPLoss_forward

          Contextual_Loss_forward = self.Contextual_Loss(out, gt_res)
          total_loss += Contextual_Loss_forward
          """

          style_forward += 240*self.StyleLoss(out, gt_res)
          #total_loss += style_forward

          tv_forward += 0.1*self.TVLoss(out)
          #total_loss += tv_forward

          perceptual_forward += 0.1*self.PerceptualLoss(out, gt_res)
          #total_loss += perceptual_forward

          PSNR_value += self.psnr_metric(gt_res, out)


        writer.add_scalar('loss/Perceptual', perceptual_forward, iteration+resume_iteration)
        writer.add_scalar('loss/Style', style_forward, iteration+resume_iteration)
        writer.add_scalar('loss/TV', tv_forward, iteration+resume_iteration)
        writer.add_scalar('loss/L1CosineSim', L1CosineSim_forward, iteration+resume_iteration)

        total_loss = perceptual_forward + style_forward + tv_forward + L1CosineSim_forward

        #total_loss += loss_rec + loss_PerceptualLoss + loss_style
        #loss_text += (self.w[1] * loss_prc) + (self.w[2] * loss_style) + (self.w[3] * loss_tv)


        writer.add_scalar('Total', total_loss, iteration+resume_iteration)

        # PSNR (Peak Signal-to-Noise Ratio)
        #writer.add_scalar('metrics/PSNR', self.psnr_metric(gt_res, out), iteration)
        writer.add_scalar('metrics/PSNR', PSNR_value, iteration+resume_iteration)

        # SSIM (Structural Similarity)
        #writer.add_scalar('metrics/SSIM', self.ssim_metric(gt_res, out), iteration)

        # AE (Average Angular Error)
        #writer.add_scalar('metrics/SSIM', ae_metric(gt_res, out), iteration)

        # MSE (Mean Square Error)
        #writer.add_scalar('metrics/MSE', self.mse_metric(gt_res, out), iteration)

        # LPIPS (Learned Perceptual Image Patch Similarity)
        #writer.add_scalar('metrics/SSIM', lpips_metric(gt_res, out), iteration)

        return total_loss


In [None]:
#@title train.py (added saving + iteration offset)
%%writefile /content/Colab-DFNet/train.py
#from diffaug import *

resume_iteration = 0

import argparse
import os

import numpy as np
import torch
from tensorboardX import SummaryWriter
from torch.utils import data
from torchvision import transforms
from torchvision.utils import make_grid, save_image
from tqdm import tqdm

from data import DS
from loss import InpaintingLoss
from model import DFNet

from torchvision.utils import save_image

class InfiniteSampler(data.sampler.Sampler):
    def __init__(self, num_samples):
        self.num_samples = num_samples

    def __iter__(self):
        return iter(self.loop())

    def __len__(self):
        return 2 ** 31

    def loop(self):
        i = 0
        order = np.random.permutation(self.num_samples)
        while True:
            yield order[i]
            i += 1
            if i >= self.num_samples:
                np.random.seed()
                order = np.random.permutation(self.num_samples)
                i = 0


parser = argparse.ArgumentParser()
parser.add_argument('--root', type=str, default='/path/')
parser.add_argument('--save_dir', type=str, default='/path/')
#parser.add_argument('--log_dir', type=str, default='./logs/default')
parser.add_argument('--lr', type=float, default=2e-3)
parser.add_argument('--max_iter', type=int, default=5000000)
parser.add_argument('--batch_size', type=int, default=1)
parser.add_argument('--n_threads', type=int, default=16)
parser.add_argument('--save_model_interval', type=int, default=500)
parser.add_argument('--vis_interval', type=int, default=500)
parser.add_argument('--log_interval', type=int, default=1)
parser.add_argument('--image_size', type=int, default=256)
parser.add_argument('--resume', type=str)
args = parser.parse_args()

torch.backends.cudnn.benchmark = True
device = torch.device('cuda')

if not os.path.exists(args.save_dir):
    os.makedirs('{:s}/images'.format(args.save_dir))
    os.makedirs('{:s}/ckpt'.format(args.save_dir))

#writer = SummaryWriter(logdir=args.log_dir)

size = (args.image_size, args.image_size)
img_tf = transforms.Compose([
    transforms.Resize(size=size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

dataset = DS(args.root, img_tf)

iterator_train = iter(data.DataLoader(
    dataset, batch_size=args.batch_size,
    sampler=InfiniteSampler(len(dataset)),
    num_workers=args.n_threads
))
print(len(dataset))
model = DFNet().to(device)

lr = args.lr

start_iter = 0
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
criterion = InpaintingLoss().to(device)

if args.resume:
    checkpoint = torch.load(args.resume, map_location=device)
    model.load_state_dict(checkpoint)

for i in tqdm(range(start_iter, args.max_iter)):
    model.train()

    img, mask = [x.to(device) for x in next(iterator_train)]

    # inpainting
    masked = img * mask


    # mosaic
    """
    MOSAIC_MIN = 0.01
    MOSAIC_MID =  0.2
    MOSAIC_MAX = 0.0625

    mosaic_size = int(random.triangular(int(min(256*MOSAIC_MIN, 256*MOSAIC_MIN)), int(min(256*MOSAIC_MID, 256*MOSAIC_MID)), int(min(256*MOSAIC_MAX, 256*MOSAIC_MAX))))
    images_mosaic = nnf.interpolate(img, size=(mosaic_size, mosaic_size), mode='nearest')
    images_mosaic = nnf.interpolate(images_mosaic, size=(256, 256), mode='nearest')
    #masked = (img * (1 - mask).float()) + (images_mosaic * (mask).float())
    masked = (images_mosaic * (1 - mask).float()) + (img * (mask).float())
    """

    results, alpha, raw = model(masked, mask)

    #with torch.cuda.amp.autocast():
    loss = criterion(results, img, i)


    # no amp
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    """
    # amp
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    optimizer.zero_grad()
    """
    """
    if (i + 1) % args.log_interval == 0:
        writer.add_scalar('loss', loss.item(), i + 1)
    """

    if (i + 1) % args.save_model_interval == 0 or (i + 1) == args.max_iter:
        torch.save(model.state_dict(), '{:s}/ckpt/{:d}.pth'.format(args.save_dir, i + 1 + resume_iteration))

    if (i + 1) % args.vis_interval == 0:
        s_img = torch.cat([img, masked, results[0]])
        s_img = make_grid(s_img, nrow=args.batch_size)
        save_image(s_img, '{:s}/images/test_{:d}.png'.format(args.save_dir, i + 1 + resume_iteration))

    if (i + 1) % 10000:
        scheduler.step()

    # amp
    #scaler.update()


In [None]:
%cd /content/Colab-DFNet
!python train.py --root /content/train_data --resume '/content/Colab-DFNet/model_places2.pth' --save_dir "/content/drive/MyDrive/Colab-DFNet/"

# Testing

In [None]:
#@title Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
!mkdir /content/datasets

In [None]:
#@title git clone original repo and download models
!git clone https://github.com/hughplay/DFNet.git
%cd DFNet/model
!pip install gdown
# places
!gdown --id 1SGJ_Z9kpchdnZ3Qwwf4HnN-Cq-AeK7vH
# celeba
!gdown --id 1e6KVfSdILygDcyL-ps1jckS4Ff18Z3rj

In [None]:
#@title test.py (you can edit the output resolution filesize there)
%%writefile /content/DFNet/test.py
from collections import defaultdict
from itertools import islice
from multiprocessing.pool import ThreadPool as Pool
import os
from pathlib import Path

import argparse
import cv2
import numpy as np
import torch
import tqdm

from utils import list2nparray, gen_miss, merge_imgs
from model import DFNet


class Tester:

    def __init__(self, model_path, input_size, batch_size):
        self.model_path = model_path
        self._input_size = input_size
        self.batch_size = batch_size
        self.init_model(model_path)

    @property
    def input_size(self):
        if self._input_size > 0:
            return (self._input_size, self._input_size)
        elif 'celeba' in self.model_path:
            return (1024, 1024) # edit these values for resolution, must be 2^x
        else:
            return (1024, 1024)

    def init_model(self, path):
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
            print('Using gpu.')
        else:
            self.device = torch.device('cpu')
            print('Using cpu.')

        self.model = DFNet().to(self.device)
        checkpoint = torch.load(path, map_location=self.device)
        self.model.load_state_dict(checkpoint)
        self.model.eval()

        print('Model %s loaded.' % path)

    def get_name(self, path):
        return '.'.join(path.name.split('.')[:-1])

    def results_path(self, output, img_path, mask_path, prefix='result'):
        img_name = self.get_name(img_path)
        mask_name = self.get_name(mask_path)
        return {
            'result_path': self.sub_dir('result').joinpath(
                'result-{}-{}.png'.format(img_name, mask_name)),
            'raw_path': self.sub_dir('raw').joinpath(
                'raw-{}-{}.png'.format(img_name, mask_name)),
            'alpha_path': self.sub_dir('alpha').joinpath(
                'alpha-{}-{}.png'.format(img_name, mask_name))
        }

    def inpaint_instance(self, img, mask):
        """Assume color image with 3 dimension. CWH"""
        img = img.view(1, *img.shape)
        mask = mask.view(1, 1, *mask.shape)
        return self.inpaint_batch(img, mask).squeeze()

    def inpaint_batch(self, imgs, masks):
        """Assume color channel is BGR and input is NWHC np.uint8."""
        imgs = np.transpose(imgs, [0, 3, 1, 2])
        masks = np.transpose(masks, [0, 3, 1, 2])

        imgs = torch.from_numpy(imgs).to(self.device)
        masks = torch.from_numpy(masks).to(self.device)
        imgs = imgs.float().div(255)
        masks = masks.float().div(255)
        imgs_miss = imgs * masks
        results = self.model(imgs_miss, masks)
        if type(results) is list:
            results = results[0]
        results = results.mul(255).byte().data.cpu().numpy()
        results = np.transpose(results, [0, 2, 3, 1])
        return results

    def _process_file(self, output, img_path, mask_path):
        item = {
            'img_path': img_path,
            'mask_path': mask_path,
        }
        item.update(self.results_path(output, img_path, mask_path))
        self.path_pair.append(item)

    def process_single_file(self, output, img_path, mask_path):
        self.path_pair = []
        self._process_file(output, img_path, mask_path)

    def process_dir(self, output, img_dir, mask_dir):
        img_dir = Path(img_dir)
        mask_dir = Path(mask_dir)
        imgs_path = sorted(
            list(img_dir.glob('*.jpg')) + list(img_dir.glob('*.png')))
        masks_path = sorted(
            list(mask_dir.glob('*.jpg')) + list(mask_dir.glob('*.png')))

        n_img = len(imgs_path)
        n_mask = len(masks_path)
        n_pair = min(n_img, n_mask)

        self.path_pair = []
        for i in range(n_pair):
            img_path = imgs_path[i % n_img]
            mask_path = masks_path[i % n_mask]
            self._process_file(output, img_path, mask_path)

    def get_process(self, input_size):
        def process(pair):
            img = cv2.imread(str(pair['img_path']), cv2.IMREAD_COLOR)
            mask = cv2.imread(str(pair['mask_path']), cv2.IMREAD_GRAYSCALE)

            if input_size:
                img = cv2.resize(img, input_size)
                mask = cv2.resize(mask, input_size)

            img = np.ascontiguousarray(img.transpose(2, 0, 1)).astype(np.uint8)
            mask = np.ascontiguousarray(
                np.expand_dims(mask, 0)).astype(np.uint8)

            pair['img'] = img
            pair['mask'] = mask
            return pair
        return process

    def _file_batch(self):
        pool = Pool()

        n_pair = len(self.path_pair)
        n_batch = (n_pair-1) // self.batch_size + 1

        for i in tqdm.trange(n_batch, leave=False):
            _buffer = defaultdict(list)
            start = i * self.batch_size
            stop = start + self.batch_size
            process = self.get_process(self.input_size)
            batch = pool.imap_unordered(
                process, islice(self.path_pair, start, stop))
            for instance in batch:
                for k, v in instance.items():
                    _buffer[k].append(v)
            yield _buffer

    def batch_generator(self):
        generator = self._file_batch

        for _buffer in generator():
            for key in _buffer:
                if key in ['img', 'mask']:
                    _buffer[key] = list2nparray(_buffer[key])
            yield _buffer

    def to_numpy(self, tensor):
        tensor = tensor.mul(255).byte().data.cpu().numpy()
        tensor = np.transpose(tensor, [0, 2, 3, 1])
        return tensor

    def process_batch(self, batch, output):
        imgs = torch.from_numpy(batch['img']).to(self.device)
        masks = torch.from_numpy(batch['mask']).to(self.device)
        imgs = imgs.float().div(255)
        masks = masks.float().div(255)
        imgs_miss = imgs * masks

        result, alpha, raw = self.model(imgs_miss, masks)
        result, alpha, raw = result[0], alpha[0], raw[0]
        result = imgs * masks + result * (1 - masks)

        result = self.to_numpy(result)
        alpha = self.to_numpy(alpha)
        raw = self.to_numpy(raw)

        for i in range(result.shape[0]):
            cv2.imwrite(str(batch['result_path'][i]), result[i])
            cv2.imwrite(str(batch['raw_path'][i]), raw[i])
            cv2.imwrite(str(batch['alpha_path'][i]), alpha[i])

    @property
    def root(self):
        return Path(self.output)

    def sub_dir(self, sub):
        return self.root.joinpath(sub)

    def prepare_folders(self, folders):
        for folder in folders:
            Path(folder).mkdir(parents=True, exist_ok=True)

    def inpaint(self, output, img, mask, merge_result=False):

        self.output = output
        self.prepare_folders([
            self.sub_dir('result'), self.sub_dir('alpha'),
            self.sub_dir('raw')])

        if os.path.isfile(img) and os.path.isfile(mask):
            if img.endswith(('.png', '.jpg', '.jpeg')):
                self.process_single_file(output, img, mask)
                _type = 'file'
            else:
                raise NotImplementedError()
        elif os.path.isdir(img) and os.path.isdir(mask):
            self.process_dir(output, img, mask)
            _type = 'dir'
        else:
            print('Img: ', img)
            print('Mask: ', mask)
            raise NotImplementedError(
                'img and mask should be both file or directory.')

        print('# Inpainting...')
        print('Input size:', self.input_size)
        for batch in self.batch_generator():
            self.process_batch(batch, output)
        print('Inpainting finished.')

        if merge_result and _type == 'dir':
            miss = self.sub_dir('miss')
            merge = self.sub_dir('merge')

            print('# Preparing input images...')
            gen_miss(img, mask, miss)
            print('# Merging...')
            merge_imgs([
                miss, self.sub_dir('raw'), self.sub_dir('alpha'),
                self.sub_dir('result'), img], merge, res=self.input_size[0])
            print('Merging finished.')


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-m', '--model', default='./model/model_places2.pth',
        help='Select a checkpoint.')
    parser.add_argument(
        '-i', '--input_size', default=0, type=int,
        help='Batch size for testing.')
    parser.add_argument(
        '-b', '--batch_size', default=8, type=int,
        help='Batch size for testing.')
    parser.add_argument(
        '--img', default='./samples/places2/img',
        help='Image or Image folder.')
    parser.add_argument(
        '--mask', default='./samples/places2/mask',
        help='Mask or Mask folder.')
    parser.add_argument('--output', default='./output/places2',
        help='Output dir')
    parser.add_argument(
        '--merge', action='store_true',
        help='Whether merge input and results for better viewing.')

    args = parser.parse_args()
    tester = Tester(args.model, args.input_size, args.batch_size)

    tester.inpaint(args.output, args.img, args.mask, merge_result=args.merge)

In [None]:
# extract archive with data
%cd /content/
!7z x /content/archive.7z

In [None]:
# install imagemagick
!sudo apt-get install imagemagick imagemagick-doc 

In [None]:
# negate masks, if needed
import cv2
import glob
import os
from tqdm import tqdm
import numpy as np

mask_dir = '/content/masks'
files = glob.glob(mask_dir + '/**/*.png', recursive=True)

for f in tqdm(files):
  os.system('convert {} -negate {}'.format(f, f))

In [None]:
# delete output
%cd /content/
!sudo rm -rf /content/output
!mkdir /content/output

In [None]:
# create archive with results 
!tar -czvf /content/archive.tar.gz /content/output/result

In [None]:
# test
%cd /content/DFNet
!python test.py --model /content/DFNet/model/model_places2.pth --img /content/input --mask /content/masks --output /content/output --merge -b 1

In [None]:
# delete everything
%cd /content/
!sudo rm -rf /content/output
!sudo rm -rf /content/input
!sudo rm -rf /content/masks
!mkdir /content/output
!mkdir /content/input
!mkdir /content/masks