In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir("/content/gdrive/My Drive/Colab Notebooks/图片翻转")

import cv2
from google.colab.patches import cv2_imshow
import numpy as np
from PIL import Image
import time

import tensorflow as tf
from tensorflow.python.keras.preprocessing import image as kp_image
from tensorflow.python.keras import models

tf.enable_eager_execution() #attempt to use eager execution
print("Eager execution: {}".format(tf.executing_eagerly()))




Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Eager execution: True


In [0]:
def get_position(image_path,h=512,w=384): #just in case the original picture cannot reach the minimize requirement of VGG19
  # we use openCV inner classifier
  
  img= cv2.imread(image_path)
  img=cv2.resize(img,(h,w))

  classifier = cv2.CascadeClassifier('/usr/local/lib/python3.6/dist-packages/cv2/data/haarcascade_frontalface_default.xml')
  detect_region = classifier.detectMultiScale(img, 1.015, 3, cv2.CASCADE_DO_CANNY_PRUNING) #by a lot of experiments, we foudn these params are good!
  collect=[] #we might have multiple faces recognized

  for i in detect_region: 
      x, y, w, h = i
      cv2.rectangle(img, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 0),2)
      collect.append([int(x), int(y),int(x + w), int(y + h)])
  print('image of face recognition')
  cv2_imshow(img)
  return collect


In [0]:
from tensorflow.python.keras.preprocessing import image as kp_image

def vgg_process_img(image_path,to_gray=0): #we preprocess the image with color modification
  max_dim = 512
  img = Image.open(image_path)
  long = max(img.size)

  scale = max_dim/long
  img = img.resize((round(img.size[0]*scale), round(img.size[1]*scale)), Image.ANTIALIAS)
 
  img = kp_image.img_to_array(img)

  img= img[np.newaxis, :]
  if to_gray==2: #we use random RGB image as the initial image
    img=np.random.randint(0,256,img.shape)

  img = tf.keras.applications.vgg19.preprocess_input(img)
  return img


In [0]:
def get_model(content_layers,style_layers): #we use pre-trained vgg19 model and load the corresponding layers' weight
  vgg_19 = tf.keras.applications.vgg19.VGG19(include_top=False, weights='imagenet',pooling='avg') #we use average pooling instead of the max, it could handle more margin issue
  vgg_19.trainable = False #do not train model weights！
  style_outputs = [vgg_19.get_layer(name).output for name in style_layers]
  content_outputs = [vgg_19.get_layer(name).output for name in content_layers]
  model_outputs = style_outputs + content_outputs

  my_model=models.Model(vgg_19.input, model_outputs)
  my_model.summary() #print sumamry of the model
  return my_model






In [0]:
def get_content_loss(base_content, target):#calculate content loss as in Paper
  return tf.reduce_mean(tf.square(base_content - target))

def gram_matrix(input_tensor):  #calculate gram_matrix 
  # We make the image channels first 
  channels = int(input_tensor.shape[-1])
  a = tf.reshape(input_tensor, [-1, channels])
  n = tf.shape(a)[0]
  gram = tf.matmul(a, a, transpose_a=True)
  return gram / tf.cast(n, tf.float32) #get rid off unnecessary scale terms to increase loss penalty

def get_style_loss(base_style, gram_target): #calculate style loss as in Paper
  height, width, channels = base_style.get_shape().as_list()
  gram_style = gram_matrix(base_style)
  return tf.reduce_mean(tf.square(gram_style - gram_target))

def get_feature_representations(model, content_path, style_path,flag,index):


  content_image = vgg_process_img(content_path)
  style_image = vgg_process_img(style_path)
  
  style_outputs = model(style_image)
  content_outputs = model(content_image)
  
  
  # Get the style and content feature representations from our model  
  style_features = [style_layer[0] for style_layer in style_outputs[:index]]
  content_features = [content_layer[0] for content_layer in content_outputs[index:]]
  #return a list and each elements in list stands for the features of the conv layers
  return style_features, content_features

def high_pass_x_y(image):
  x_var = image[:,:,1:,:] - image[:,:,:-1,:]
  y_var = image[:,1:,:,:] - image[:,:-1,:,:]

  return x_var, y_var

def total_variation_loss(image):
  x_deltas, y_deltas = high_pass_x_y(image)
  return tf.reduce_sum(tf.abs(x_deltas)) + tf.reduce_sum(tf.abs(y_deltas))


def compute_loss(model, loss_weights, init_image, gram_style_features, content_features, index):#computer total loss

  style_weight, content_weight = loss_weights

  model_outputs = model(init_image) #we use our initial image tensor bring in the model
  
  
  style_output_features = model_outputs[:index]
  content_output_features = model_outputs[index:]
  
  style_score = 0
  content_score = 0

  weight_per_style_layer = 1 / len(style_output_features)

  
  for target_style, comb_style in zip(gram_style_features, style_output_features):
    
    style_score += weight_per_style_layer * get_style_loss(comb_style[0], target_style)
    
  # Accumulate content losses from all layers 
  weight_per_content_layer = 1 / len(content_output_features)
  for target_content, comb_content in zip(content_features, content_output_features):
    content_score += weight_per_content_layer* get_content_loss(comb_content[0], target_content)
  
  style_score *= style_weight
  
  content_score *= content_weight
  #print('相加')
  var_loss=total_variation_loss(init_image)
  loss = style_score + content_score+0*var_loss
  
  return loss, style_score, content_score,var_loss
  
def compute_grads(model,loss_weights,init_image,gram_style_features,content_features,split_index):
  
  with tf.GradientTape() as tape: 
    all_loss = compute_loss(model,loss_weights,init_image,gram_style_features,content_features,split_index)
  # Compute gradients wrt input image
  total_loss = all_loss[0]
  #print('这是')
  #print('variation: ',total_variation_loss(init_image))
  #print('total: ',all_loss[0])
  return tape.gradient(total_loss, init_image), all_loss

def deprocess_img(processed_img): #this stage is important as we need to convert tensors back to valid image
  x = processed_img.copy()
  if len(x.shape) == 4:
    x = np.squeeze(x, 0)
  assert len(x.shape) == 3, ("Input to deprocess image must be an image of "
                             "dimension [1, height, width, channel] or [height, width, channel]")
  if len(x.shape) != 3:
    raise ValueError("Invalid input to deprocessing image")
  
  # perform the inverse of the preprocessiing step
  x[:, :, 0] += 103.939
  x[:, :, 1] += 116.779
  x[:, :, 2] += 123.68
  x = x[:, :, ::-1]

  x = np.clip(x, 0, 255).astype('uint8')
  return x

In [0]:
def main(content_path, style_path,content_layers,style_layers,loss_weights,local_grad=1):#for loss_weights, the first is style weight, the second is content weight, abd local_grad decide weather or not use localized optimization

  split_index=len(style_layers)
  model = get_model(content_layers,style_layers)
  style_features, content_features = get_feature_representations(model, content_path, style_path,0,split_index)
  gram_style_features = [gram_matrix(style_feature) for style_feature in style_features]
  loss_weights=loss_weights 
  init_image=vgg_process_img(content_path,0)


  width,height=init_image.shape[1],init_image.shape[2]
  pos_collect=get_position(content_path,height, width)

  change=np.ones([1,width,height,3],dtype='float32') #this is the 1 or 0 matrix to tell grad where to update
  
  if local_grad==1:
    for c in range(len(pos_collect)):
      x,y,x_next,y_next=pos_collect[c]
      for k in range(3):
        for i in range(height):  #y represnets is the height
          for j in range(width):  #x represnets is the width
            if i>y and i<y_next and j>x and j<x_next:
              change[0,i,j,k]=0.05


  import IPython.display
  norm_means = np.array([103.939, 116.779, 123.68])
  min_vals = -norm_means
  max_vals = 255 - norm_means 

  init_image = tf.Variable(init_image, dtype=tf.float32)    
  opt = tf.train.AdamOptimizer(learning_rate=20, beta1=0.99, epsilon=1e-1)#更改梯度  
  begin_time=time.time()
  for i in range(600):
    
    grads, all_loss = compute_grads( model,loss_weights,init_image,gram_style_features,content_features,split_index)
    grads=tf.multiply(grads,tf.constant(change)) #LOCALIZE



    opt.apply_gradients([(grads, init_image)])
    clipped = tf.clip_by_value(init_image, min_vals, max_vals)
    init_image.assign(clipped)
    if i%150==0:
        plot_img = init_image.numpy()
        plot_img = deprocess_img(plot_img)
        print(i,' ...iterations gone, costs...',(time.time()-begin_time)/60,' seconds')
        print('total_loss_percent: ',all_loss[0].numpy(),'style_loss_percent: ',all_loss[1].numpy()/all_loss[0].numpy(),'content_loss_percent: ',all_loss[2].numpy()/all_loss[0].numpy(),'variation_percent: ',all_loss[3].numpy()/all_loss[0].numpy())
        #cv2_imshow(plot_img[0])
        IPython.display.display_png(Image.fromarray(plot_img))


In [0]:
content_layers=['block4_conv2']  #you may change here
style_layers=['block1_conv1','block2_conv1'] # you may change here
content_path = '实拍15.jpg'
style_path =  '素描3.jpg'

main(content_path, style_path,content_layers,style_layers,[0.01,1.4])

Model: "model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0   