In [1]:
# Theano/Lasagne
import theano
import lasagne
import theano.tensor as T
from lasagne.utils import floatX
from lasagne.layers import *

# numpy/scipy/scikit
import numpy as np
import skimage.transform
import scipy.optimize

# notebook
from nbtools import display_img_array
from IPython.display import clear_output

# for vgg19 model and conv2dlayer
from vgg_model import load_vgg19, ConvLayer

# neural style helper functions
from ns_helpers import prep_image, imread, get_img, Func, Eval, Shared, deprocess

Using gpu device 0: GeForce GTX 980 (CNMeM is enabled with initial size: 75.0% of memory, CuDNN 4007)


Set some parameters

In [2]:
# Load vgg19 model and set feature layer for neural matching
net = load_vgg19(pkl_filename='vgg19.pkl')

Use conv to find nearest neighbor of a patch

In [3]:
# builder of best_match
# not using lru_cache because of python 2.7 compatibility
def build_best_match(C, psize, num_patches, input_img, mem={}):
    key = (C, psize, num_patches)
    if key not in mem:
        # compute norm
        #input_img = T.tensor4()
        layer_img = InputLayer((1, C, None, None), input_var=input_img)
        layer_sqr_sum = ExpressionLayer(layer_img,  lambda x:T.sum(T.sqr(x), axis=1,keepdims=True) , output_shape=(1,1,None,None))
        layer_norm = ConvLayer(layer_sqr_sum, 1, psize, pad='valid', W=floatX(np.ones((1, 1, psize,psize))), b=None, nonlinearity=T.sqrt)
        layer_norm_reshape = ReshapeLayer(layer_norm, ([2], [3]))
        # compute correlation
        input_patch = T.tensor4()
        patch_sqr_sum = (input_patch**2).sum(axis=3).sum(axis=2).sum(axis=1).reshape((1, num_patches, 1, 1))
        layer_prod= ConvLayer(layer_img, num_patches, psize, pad='valid', W=input_patch, b=None, nonlinearity=None)
        layer_corr = ElemwiseMergeLayer([layer_prod, layer_norm_reshape], lambda a,b: (a/b)/patch_sqr_sum)
        layer_corr_output = lasagne.layers.get_output(layer_corr)
        # Pack into a function to find the Nearest Neighbor        
        mem[key] = theano.function([input_patch], T.max_and_argmax(layer_corr_output, axis=(2,3))+[T.shape(layer_corr_output)])
    return mem[key]

Scan through the content image, for each PSIZExPSIZE patch in the content image, and find the best match(nearest neighbor) in style image

In [4]:
def match_all(photo_feature, style_feature, IMAGE_H, IMAGE_W, PSIZE=3, stride=2):
    # there are H rows of patches and W patches at each row.    
    assert photo_feature.shape[0] == 1
    assert style_feature.shape[0] == 1
    assert photo_feature.shape[1]== style_feature.shape[1]
    H = photo_feature.shape[2] - PSIZE+1
    
    W = photo_feature.shape[3] - PSIZE + 1    
    # num_patches = 1+ (W-1)//stride
    C = photo_feature.shape[1]

    # build the best_match routine
    input_img = Shared(("match_input_img"), style_feature)
    best_match = build_best_match(C, PSIZE, W, input_img)

    # set up arrays for input patches and output results
    patches = np.zeros( shape=(W, photo_feature.shape[1], PSIZE, PSIZE) ,  dtype= photo_feature.dtype)
    mapping_idx = np.zeros((H,W), dtype='int64')
    mapping_weight = None #np.zeros((H,W), dtype='float32')

    # doing the match row by row
    for j in range(0, H, stride):
        for n in range(W):            
                patches[n] = photo_feature[0, :, j:j+PSIZE,n:n+PSIZE ]  
        m, idx, s = best_match(patches)
        mapping_idx[j] =idx
        #mapping_weight[j] = (m+1)**2  # making the weight positive in a some what arbitrary way    
    return mapping_idx, mapping_weight

In [5]:
def plot_match_result_with_a2(mapping_idx, mapping_weight, src, psize, method):
    # assert mapping_idx.shape == mapping_weight.shape
    assert method in ('max', 'average')
    H, W = mapping_idx.shape
    S = src.shape[2]//(H+psize-1)
    assert 1 == S
    preview = np.zeros(src.shape)
    a2 = np.zeros(src.shape)
    preview_weight = np.zeros(src.shape)
    for j in range(H):
        for n in range(W):
            y,x = np.unravel_index(mapping_idx[j,n], mapping_idx.shape)
            _preview = preview[:,:,j:(j+psize), n:(n+psize)]
            _preview_weight = preview_weight[:,:,j:(j+psize), n:(n+psize)]  
            _a2 = a2[:,:,j:(j+psize), n:(n+psize)]             
            _preview[...] += src[:,:,y:(y+psize), x:(x+psize)]
            _preview_weight[...] += 1
            _a2 += src[:,:,y:(y+psize), x:(x+psize)]**2
    if method == 'average':
        preview /= preview_weight
        a2 /= preview_weight
    return preview, a2

SyntaxError: invalid syntax (<ipython-input-5-ec8b03fadaa8>, line 22)

Set and load content image and style image

In [None]:
img0 = imread("CNNMRF/data/content/0.jpg")
img1 = imread("CNNMRF/data/style//0.jpg")
imgh, imgw = img0.shape[:2]
imgh,imgw = 384, 384*imgw//imgh

In [None]:
display_img_array(np.hstack([prep_image(img0, imgw, imgh)[0], prep_image(img1, imgw, imgh)[0]]))

## Neural Style transformation

set content layers and style layers

In [None]:
content_layers = ['conv4_2']
#style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1']
mrf_layers = ['conv4_1', 'conv3_1']
layers = {k: net[k] for k in content_layers+mrf_layers}

basic norm and loss functions

In [None]:
def gram_matrix(x):
    x = x.flatten(ndim=3)
    g = T.tensordot(x, x, axes=([2], [2]))
    return g

def content_loss(P, X, layer):
    p = P[layer]
    x = X[layer]
    loss = 1./(x.shape[0] * x.shape[1]*x.shape[2]*x.shape[3])  * lasagne.objectives.squared_error(x, p).sum()
    return loss  

def total_variation_loss(x):
    return ((((x[:,:,:-1,:-1] - x[:,:,1:,:-1])**2 + (x[:,:,:-1,:-1] - x[:,:,:-1,1:])**2)).sum())

In [None]:
def mrf_loss(A, A2, X, layer):    
    a = A[layer]
    a2 = A2[layer]
    x = X[layer]    
    loss = (0.5*(x**2).sum()+a2-(x*a).sum())
    return loss

In [None]:
def transfer(img0, img1, IMAGE_H, IMAGE_W, PSIZE, init_img=None, mrf_weight=1e-4, content_weight=2e1, tv_weight=1e-3):
    # prepare content and style
    raw_content, content = prep_image(get_img(img0), IMAGE_W, IMAGE_H)
    print("content", content.shape)
    raw_style, style = prep_image(get_img(img1), IMAGE_W, IMAGE_H)

    input_image = Shared(("input_image"), style)
    outputs = dict(zip(layers.keys(), lasagne.layers.get_output(layers.values(), input_image))  )  
    compute_mrf_features = lambda :{k: np.array(Eval(k, outputs[k])) for k in mrf_layers}
    
    input_image.set_value(style)
    style_mrf_features = compute_mrf_features()
    style_features = {k: Shared(("style_"+k), v) for k,v in style_mrf_features.items()}
    
    input_image.set_value(content)
    # content_mrf_features = compute_mrf_features()
    content_features = {k: Shared(("content_"+k), Eval(k, outputs[k])) for k in content_layers}
    
    # prepare gen_features    
    generated_image = input_image
    if init_img is not None:
        raw_init, init = prep_image(init_img, IMAGE_W, IMAGE_H)
        generated_image.set_value(init)        
    else:
        generated_image.set_value(content)    
    gen_features = outputs
    
    x0 = input_image.get_value()
    dx0 = deprocess(x0)            
    display_img_array(dx0)
    
    # prepare inital mapping
    
    style_a = {}
    style_a2 = {}
    
    def update_mrf_loss_weight():
        x0_mrf_features = compute_mrf_features()        
        for k in mrf_layers:
            src = style_mrf_features[k]
            mapping_idx, mapping_weight = match_all(x0_mrf_features[k], src, IMAGE_H, IMAGE_W, PSIZE)
            A, A2 = plot_match_result_with_a2(mapping_idx, mapping_weight, src, PSIZE, "average")
            style_a[k] = Shared(('A_'+k), floatX(A))            
            style_a2[k] = Shared(('A2_'+k), floatX(A2.sum())) 
            
    update_mrf_loss_weight()
    tv_scalar = Shared('tv_weight', tv_weight)
    c_scalar = Shared("content_weight", content_weight)
    s_scalar = Shared("mrf_weight", mrf_weight)
    tv_loss = tv_scalar * total_variation_loss(generated_image) 
    c_loss = 0.
    for layer in content_layers:
           c_loss += c_scalar * content_loss(content_features, gen_features, layer)
    s_loss = 0.
    for layer in mrf_layers:
            s_loss +=  s_scalar * mrf_loss(style_a, style_a2,  gen_features, layer)
            
    total_loss = tv_loss + c_loss+ s_loss
    f_errors = Func(('errors'), [], [tv_loss, c_loss, s_loss])
    grad = T.grad(total_loss, generated_image)
    # Theano functions to evaluate loss and gradient
    f_loss = Func(("loss"), [], total_loss)        
    f_grad = Func(("grad"), [], grad)        
    x0 = generated_image.get_value().astype('float64')
    
    def eval_loss(x0):        
        x0 = floatX(x0.reshape((1, 3, IMAGE_H, IMAGE_W)))
        generated_image.set_value(x0)
        update_mrf_loss_weight()
        return f_loss().astype('float64')

    def eval_grad(x0):        
        return np.array(f_grad()).flatten().astype('float64')
    
    for i in range(10):
        x, loss, d = scipy.optimize.fmin_l_bfgs_b(eval_loss, x0.flatten(), fprime=eval_grad, maxiter= 40, factr=1e1)    
        x0 = generated_image.get_value()
        errors = [float(x) for x in f_errors()]        
        print(i, loss, errors, d)
        dx0 = deprocess(x0)
        display_img_array(dx0)
        if d['warnflag']==0:
            break
    return dx0
    


In [None]:
dx = transfer(img0, img1, imgh//4, imgw//4, 3)
dx = transfer(img0, img1, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img0, img1, imgh, imgw, 3, init_img=dx)

In [None]:
dx = transfer(img1, img0, imgh//4, imgw//4, 3, content_weight=0.5e1)
dx = transfer(img1, img0, imgh//2, imgw//2, 3, init_img=dx, content_weight=0.5e1)
dx = transfer(img1, img0, imgh, imgw, 3, init_img=dx, content_weight=0.5e1)

In [None]:
img2 = imread("img/tjw1.jpg")
img3 = imread("simpsguestmain.jpg")
imgh, imgw = 480,640
dx = transfer(img2, img3, imgh//4, imgw//4, 3)
dx = transfer(img2, img3, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img2, img3, imgh, imgw, 3, init_img=dx)

In [None]:
img2 = imread("img/tjw1.jpg")
img3 = imread("Sofia-Vergara-in-the-Simpsons-647x395.jpg")
imgh, imgw = 480,640
dx = transfer(img2, img3, imgh//4, imgw//4, 3)
dx = transfer(img2, img3, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img2, img3, imgh, imgw, 3, init_img=dx)

In [None]:
img2 = imread("2015122760189693.jpg")
img3 = imread("Sofia-Vergara-in-the-Simpsons-647x395.jpg")
imgh, imgw = 480,640
dx = transfer(img2, img3, imgh//4, imgw//4, 3)
dx = transfer(img2, img3, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img2, img3, imgh, imgw, 3, init_img=dx)

In [None]:
img2 = imread("img/tjw1.jpg")
img3 = imread("CNNMRF/data/style/picasso.jpg")
imgh, imgw = img3.shape[:2]
dx = transfer(img2, img3, imgh//4, imgw//4, 3)
dx = transfer(img2, img3, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img2, img3, imgh, imgw, 3, init_img=dx)

In [None]:
img2 = imread("CNNMRF/data/content/potrait1.jpg")
img3 = imread("CNNMRF/data/style/picasso.jpg")
imgh, imgw = img3.shape[:2]
dx = transfer(img2, img3, imgh//4, imgw//4, 3)
dx = transfer(img2, img3, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img2, img3, imgh, imgw, 3, init_img=dx)

In [None]:
img2 = imread("CNNMRF/data/content/potrait1.jpg")
img2 = imread("img/tjw1.jpg")
img3 = imread("img/Paris-Musee-DOrsay-Vincent-van-Gogh-1889-Self-Portrait-2-Close-Up.jpg")
imgh, imgw = 384,420
dx = transfer(img2, img3, imgh//4, imgw//4, 3)
dx = transfer(img2, img3, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img2, img3, imgh, imgw, 3, init_img=dx)

In [None]:
img2 = imread("CNNMRF/data/content/d.jpg")
img3 = imread("CNNMRF/data/style/d.jpg")
imgh, imgw = img2.shape[:2]
dx = transfer(img2, img3, imgh//4, imgw//4, 3)
dx = transfer(img2, img3, imgh//2, imgw//2, 3, init_img=dx)
dx = transfer(img2, img3, imgh, imgw, 3, init_img=dx)

In [None]:
display_img_array(img1, width=200)

In [None]:
display_img_array(dx, width=200)