# Dependencies - Restart runtime after executing next cell


In [1]:
!pip install svgwrite
!pip install svgpathtools
!pip install cssutils
!pip install lpips
!pip install ftfy regex tqdm
!pip install wandb

# install diffvg
!git clone https://github.com/BachiLi/diffvg
%cd diffvg
!git submodule update --init --recursive
!python setup.py install
%cd ..

# install CLIP
!pip install git+https://github.com/openai/CLIP.git

Collecting svgwrite
  Downloading svgwrite-1.4.1-py3-none-any.whl (66 kB)
[?25l[K     |█████                           | 10 kB 25.3 MB/s eta 0:00:01[K     |█████████▉                      | 20 kB 27.3 MB/s eta 0:00:01[K     |██████████████▊                 | 30 kB 31.5 MB/s eta 0:00:01[K     |███████████████████▋            | 40 kB 22.4 MB/s eta 0:00:01[K     |████████████████████████▌       | 51 kB 18.8 MB/s eta 0:00:01[K     |█████████████████████████████▍  | 61 kB 15.6 MB/s eta 0:00:01[K     |████████████████████████████████| 66 kB 1.8 MB/s 
[?25hInstalling collected packages: svgwrite
Successfully installed svgwrite-1.4.1
Collecting svgpathtools
  Downloading svgpathtools-1.4.4-py2.py3-none-any.whl (66 kB)
[K     |████████████████████████████████| 66 kB 3.9 MB/s 
Installing collected packages: svgpathtools
Successfully installed svgpathtools-1.4.4
Collecting cssutils
  Downloading cssutils-2.3.0-py3-none-any.whl (404 kB)
[K     |████████████████████████████████| 4

In [1]:
import pydiffvg
import torch
import torchvision.models as models
import numpy as np
import random
import lpips
import clip
import wandb
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from subprocess import call
from google.colab import files

# Differentiable Drawing Wrapper

In [15]:
class DifferentiableDrawer():
    def __init__(self, device, width, height, max_stroke_width=2., gamma=1.0, optim=torch.optim.Adam):
        pydiffvg.set_print_timing(False)
        pydiffvg.set_use_gpu(torch.cuda.is_available())
        pydiffvg.set_device(device)
        self.device = device
        self.canvas_width, self.canvas_height = width, height
        self.max_stroke_width, self.gamma = max_stroke_width, gamma
        self.shapes = []
        self.shape_groups = []
        self.points_optim = optim([torch.tensor(0)], lr=1.)
        self.color_optim = optim([torch.tensor(0)], lr=0.05)
        self.width_optim = optim([torch.tensor(0)], lr=0.01)
        self.renderer = pydiffvg.RenderFunction.apply

    def add_shapes(self, n=256, shape="path", pts_range=(1,4)):
        """
        Add shapes to the set of shapes
        The shape parameter should be :
        - path
        - filled_path 
        """
        # Define shapes
        shapes = []
        shape_groups = []
        if shape=="filled_path":
            for i in range(n):
                num_segments = random.randint(pts_range[0], pts_range[1])
                num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
                points = []
                p0 = (random.random(), random.random())
                points.append(p0)
                for j in range(num_segments):
                    radius = 0.05
                    p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
                    p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
                    p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
                    points.append(p1)
                    points.append(p2)
                    if j < num_segments - 1:
                        points.append(p3)
                        p0 = p3
                points = torch.tensor(points)
                points[:, 0] *= canvas_width
                points[:, 1] *= canvas_height
                path = pydiffvg.Path(
                    num_control_points = num_control_points,
                    points = points,
                    stroke_width = torch.tensor(1.0),
                    is_closed = True
                )
                shapes.append(path)
                path_group = pydiffvg.ShapeGroup(
                    shape_ids = torch.tensor([len(shapes) - 1]),
                    fill_color = torch.tensor([
                        random.random(),
                        random.random(),
                        random.random(),
                        random.random()
                    ])
                )
                shape_groups.append(path_group)
        elif shape=="path":
            for i in range(n):
                num_segments = random.randint(pts_range[0], pts_range[1])
                num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
                points = []
                p0 = (random.random(), random.random())
                points.append(p0)
                for j in range(num_segments):
                    radius = 0.05
                    p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
                    p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
                    p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
                    points.append(p1)
                    points.append(p2)
                    points.append(p3)
                    p0 = p3
                points = torch.tensor(points)
                points[:, 0] *= canvas_width
                points[:, 1] *= canvas_height
                #points = torch.rand(3 * num_segments + 1, 2) * min(canvas_width, canvas_height)
                path = pydiffvg.Path(
                    num_control_points = num_control_points,
                    points = points,
                    stroke_width = torch.tensor(1.0),
                    is_closed = False
                )
                shapes.append(path)
                path_group = pydiffvg.ShapeGroup(
                    shape_ids = torch.tensor([len(shapes) - 1]),
                    fill_color = None,
                    stroke_color = torch.tensor([
                        random.random(),
                        random.random(),
                        random.random(),
                        random.random()
                    ])
                )
                shape_groups.append(path_group)
        # Get parameters
        points_vars = []
        color_vars = []
        stroke_width_vars = []
        for path in shapes:
            path.points.requires_grad = True
            points_vars.append(path.points)
        if shape=="path":
            for path in shapes:
                path.stroke_width.requires_grad = True
                stroke_width_vars.append(path.stroke_width)
        if shape=="filled_path":
            for group in shape_groups:
                group.fill_color.requires_grad = True
                color_vars.append(group.fill_color)
        else:
            for group in shape_groups:
                group.stroke_color.requires_grad = True
                color_vars.append(group.stroke_color)
        
        # Add parameters to optimizer
        points_new_optim = torch.optim.Adam(points_vars, lr=1.0)
        color_new_optim = torch.optim.Adam(color_vars, lr=0.05)
        width_new_optim = torch.optim.Adam(stroke_width_vars, lr=0.01)
        self.points_optim.param_groups += points_new_optim.param_groups
        self.color_optim.param_groups += color_new_optim.param_groups
        self.width_optim.param_groups += width_new_optim.param_groups

        self.shapes += shapes
        self.shape_groups += shape_groups
        
    def zero_grad(self):
        self.points_optim.zero_grad()
        self.color_optim.zero_grad()
        self.width_optim.zero_grad()

    def step(self):
        self.points_optim.step()
        self.color_optim.step()
        self.width_optim.step()
        try:
            for group in self.shape_groups:
                group.fill_color.data.clamp_(0.0, 1.0)
        except AttributeError:
            for path in self.shapes:
                path.stroke_width.data.clamp_(1.0, max_stroke_width)
            for group in self.shape_groups:
                group.stroke_color.data.clamp_(0.0, 1.0)


    def render(self, seed):
        # Forward pass: render the image.
        scene_args = pydiffvg.RenderFunction.serialize_scene(
            self.canvas_width,
            self.canvas_height,
            self.shapes,
            self.shape_groups
        )
        img = self.renderer(
            self.canvas_width, # width
            self.canvas_height, # height
            2,   # num_samples_x
            2,   # num_samples_y
            seed,   # seed
            None,
            *scene_args
        )
        # Compose img with white background
        img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device = self.device) * (1 - img[:, :, 3:4])
        # Save the intermediate render.
        pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/iter_{}.png'.format(t), gamma=self.gamma)
        img = img[:, :, :3]
        # Convert img from HWC to NCHW
        img = img.unsqueeze(0)
        img = img.permute(0, 3, 1, 2) # NHWC -> NCHW
        return img

# Image Optimization

In [None]:
import pydiffvg
import torch
import torchvision.models as models
import numpy as np
import random
import lpips
import clip
import wandb
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from subprocess import call

# Parameters
n_paths = 100
n_iter = 500
max_stroke_width = 3
gamma = 1.0
percep_loss = False # Set True to compare image features instead of L2 loss
use_filled_path = False # Set True to use filled curves instead of simple path
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize target image
target_img = np.array(Image.open("img1.png").convert('RGB').resize((320,180))) 
target = torch.from_numpy(target_img).to(torch.float32) / 255.0
target = target.pow(gamma)
target = target.to(device)
target = target.unsqueeze(0)
target = target.permute(0, 3, 1, 2) # NHWC -> NCHW
canvas_width, canvas_height = target.shape[3], target.shape[2]

# Initialize diffvg and drawer
drawer = DifferentiableDrawer(
    device, 
    width=canvas_width, 
    height=canvas_height, 
    max_stroke_width=max_stroke_width, 
    gamma=gamma, 
    optim=torch.optim.Adam
)

# Add paths
drawer.add_shapes(n_paths, shape="path", pts_range=(2,5))

l2_criterion = torch.nn.MSELoss()
percep_criterion = lpips.LPIPS(net='vgg')

NAME = "L2_Adam[+100paths/100iter]"
wandb.init(
    project="recvis-project",
    name=NAME
)

with tqdm(range(n_iter)) as loop:
    for t in loop:
        # Every 100 iterations, add some paths
        if t and (t%100==0):
            drawer.add_shapes(n_paths, shape="path", pts_range=(2,5))

        metrics = {
            'lpips': None,
            'L2': None,
            'loss': None
        }

        drawer.zero_grad() # Zero grad
        img = drawer.render(t) # Forward pass: render the image.

        # Compute losses
        l2_loss = l2_criterion(img, target)
        lpips_loss = percep_criterion(img,target)
        loss = l2_loss

        loss.backward() # Backpropagate the gradients.
        drawer.step() # Take a gradient descent step.

        # Record metrics
        with torch.no_grad():
            metrics['lpips'] = lpips_loss.item()
            metrics['L2'] = l2_loss.item()
            metrics['loss'] = loss.item()
        loop.set_postfix(render_loss=loss.item())
        wandb.log(metrics)

Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /usr/local/lib/python3.7/dist-packages/lpips/weights/v0.1/vgg.pth


 25%|██▌       | 126/500 [09:06<43:50,  7.03s/it, render_loss=0.0226]

# CLIP-Guided Generation

In [None]:
import pydiffvg
import torch
import torchvision.models as models
import numpy as np
import random
import clip
from google.colab import files
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from subprocess import call

# Parameters
n_paths = 256
n_iter = 1000
max_stroke_width = 20
gamma = 1.0
use_filled_path = False # Set True to use filled curves instead of simple path
n_augment = 4 # Number of augmentation per round

# Initialize diffvg and GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pydiffvg.set_print_timing(False)
pydiffvg.set_use_gpu(torch.cuda.is_available())
pydiffvg.set_device(device)

# Load CLIP and target
model, preprocess = clip.load('ViT-B/32', device, jit=False)
model.eval()
target_txt = clip.tokenize("Gouache painting of a house in the middle of a field on a sunny day").to(device)
with torch.no_grad():
    target_features = model.encode_text(target_txt)
canvas_width, canvas_height = 224, 224
clip_transforms = transforms.Compose([
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073), 
                         (0.26862954, 0.26130258, 0.27577711))
])
augmentation_transforms = transforms.Compose([
    transforms.RandomPerspective(fill=1, p=1, distortion_scale=0.5),
    transforms.RandomResizedCrop(224, scale=(0.7,0.9)),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073), 
                         (0.26862954, 0.26130258, 0.27577711))
])

# Initialize Shapes
shapes = []
shape_groups = []

if use_filled_path:
    for i in range(n_paths):
        num_segments = random.randint(3, 5)
        num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
        points = []
        p0 = (random.random(), random.random())
        points.append(p0)
        for j in range(num_segments):
            radius = 0.05
            p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
            p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
            p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
            points.append(p1)
            points.append(p2)
            if j < num_segments - 1:
                points.append(p3)
                p0 = p3
        points = torch.tensor(points)
        points[:, 0] *= canvas_width
        points[:, 1] *= canvas_height
        path = pydiffvg.Path(
            num_control_points = num_control_points,
            points = points,
            stroke_width = torch.tensor(1.0),
            is_closed = True
        )
        shapes.append(path)
        path_group = pydiffvg.ShapeGroup(
            shape_ids = torch.tensor([len(shapes) - 1]),
            fill_color = torch.tensor([
                random.random(),
                random.random(),
                random.random(),
                random.random()
            ])
        )
        shape_groups.append(path_group)
else:
    for i in range(n_paths):
        num_segments = random.randint(1, 2)
        num_control_points = torch.zeros(num_segments, dtype = torch.int32) + 2
        points = []
        p0 = (random.random(), random.random())
        points.append(p0)
        for j in range(num_segments):
            radius = 0.05
            p1 = (p0[0] + radius * (random.random() - 0.5), p0[1] + radius * (random.random() - 0.5))
            p2 = (p1[0] + radius * (random.random() - 0.5), p1[1] + radius * (random.random() - 0.5))
            p3 = (p2[0] + radius * (random.random() - 0.5), p2[1] + radius * (random.random() - 0.5))
            points.append(p1)
            points.append(p2)
            points.append(p3)
            p0 = p3
        points = torch.tensor(points)
        points[:, 0] *= canvas_width
        points[:, 1] *= canvas_height
        #points = torch.rand(3 * num_segments + 1, 2) * min(canvas_width, canvas_height)
        path = pydiffvg.Path(
            num_control_points = num_control_points,
            points = points,
            stroke_width = torch.tensor(1.0),
            is_closed = False
        )
        shapes.append(path)
        path_group = pydiffvg.ShapeGroup(
            shape_ids = torch.tensor([len(shapes) - 1]),
            fill_color = None,
            stroke_color = torch.tensor([
                random.random(),
                random.random(),
                random.random(),
                random.random()
            ])
        )
        shape_groups.append(path_group)

# diffvg setup
scene_args = pydiffvg.RenderFunction.serialize_scene(
    canvas_width, 
    canvas_height, 
    shapes, 
    shape_groups
)
render = pydiffvg.RenderFunction.apply
img = render(
    canvas_width, # width
    canvas_height, # height
    2,   # num_samples_x
    2,   # num_samples_y
    0,   # seed
    None,
    *scene_args
)
points_vars = []
color_vars = []
stroke_width_vars = []
for path in shapes:
    path.points.requires_grad = True
    points_vars.append(path.points)
if not use_filled_path:
    for path in shapes:
        path.stroke_width.requires_grad = True
        stroke_width_vars.append(path.stroke_width)
if use_filled_path:
    for group in shape_groups:
        group.fill_color.requires_grad = True
        color_vars.append(group.fill_color)
else:
    for group in shape_groups:
        group.stroke_color.requires_grad = True
        color_vars.append(group.stroke_color)

def shift_image(x=0.8):
    with torch.no_grad():
        for path in shapes:
            path.points *= x

# Optimizers and losses
points_optim = torch.optim.Adam(points_vars, lr=2.0)
color_optim = torch.optim.Adam(color_vars, lr=0.01)
width_optim = torch.optim.Adam(stroke_width_vars, lr=0.01)

with tqdm(range(n_iter)) as loop:
    for t in loop:
        # Zero grad
        points_optim.zero_grad()
        color_optim.zero_grad()
        width_optim.zero_grad()

        # Forward pass: render the image.
        scene_args = pydiffvg.RenderFunction.serialize_scene(
            canvas_width,
            canvas_height,
            shapes,
            shape_groups
        )
        img = render(
            canvas_width, # width
            canvas_height, # height
            2,   # num_samples_x
            2,   # num_samples_y
            t,   # seed
            None,
            *scene_args
        )
        # Compose img with white background
        img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device = device) * (1 - img[:, :, 3:4])
        # Save the intermediate render.
        pydiffvg.imwrite(img.cpu(), 'results/painterly_rendering/iter_{}.png'.format(t), gamma=gamma)
        img = img[:, :, :3]
        # Convert img from HWC to NCHW
        img = img.unsqueeze(0)
        img = img.permute(0, 3, 1, 2) # NHWC -> NCHW

        loss = 0
        imgs = torch.cat([augmentation_transforms(img) for i in range(n_augment)])
        imgs_features = model.encode_image(imgs)
        for i in range(n_augment):
            loss -= torch.cosine_similarity(target_features, imgs_features[i:i+1], dim=1)
            
        # loss = torch.cosine_similarity(target_features, model.encode_image(clip_transforms(img)), dim=1)
        # Backpropagate the gradients.
        loss.backward()

        # Take a gradient descent step.
        points_optim.step()
        color_optim.step()
        width_optim.step()
        if use_filled_path:
            for group in shape_groups:
                group.fill_color.data.clamp_(0.0, 1.0)
        else:
            for path in shapes:
                path.stroke_width.data.clamp_(1.0, max_stroke_width)
            for group in shape_groups:
                group.stroke_color.data.clamp_(0.0, 1.0)

        # log
        loop.set_postfix(render_loss=loss.item())


call(["ffmpeg", "-framerate", "60", "-i",
    "results/painterly_rendering/iter_%d.png", "-vb", "20M",
    "results/painterly_rendering/out.mp4"])
files.download('results/painterly_rendering/out.mp4')

 74%|███████▍  | 740/1000 [2:01:41<42:45,  9.87s/it, render_loss=-1.51]  


KeyboardInterrupt: ignored

In [8]:
from google.colab import files
!ffmpeg -y -framerate 60 -i results/painterly_rendering/iter_%d.png -vb 40M results/painterly_rendering/out.mp4
files.download('results/painterly_rendering/out.mp4')

ffmpeg version 3.4.8-0ubuntu0.2 Copyright (c) 2000-2020 the FFmpeg developers
  built with gcc 7 (Ubuntu 7.5.0-3ubuntu1~18.04)
  configuration: --prefix=/usr --extra-version=0ubuntu0.2 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --enable-gpl --disable-stripping --enable-avresample --enable-avisynth --enable-gnutls --enable-ladspa --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lib

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
!rm -r results/painterly_rendering/

In [None]:
!ls

sample_data
