<a href="https://colab.research.google.com/github/maddarauci/Neural-Networks/blob/main/image_transform_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install keras scipy numpy pillow



In [13]:
import PIL.Image
from keras.preprocessing.image import load_img, img_to_array
import numpy as np
from scipy.optimize import fmin_l_bfgs_b
from keras.applications import vgg19
from keras import backend as K
from keras.preprocessing.image import save_img
from PIL import Image
import argparse

# preprocessing image to make it compatible with the vgg19 model
def preprocess_image(image_path, resized_width, resized_height):
  img = load_img(image_path, target_size=(resized_width, resized_height))
  img = img_to_array(img)
  img = np.expand_dims(img, axis=0)
  img = vgg19.preprocess_input(img)
  return img

# function to convert a tensor to an image
def deprocess_image(x, resized_width, resized_height):
  x = x.reshape((resized_width, resized_height, 3))

  # remove zero center by mean pixel. necessary when working with vgg model 
  z[:, :, 0] += 103.939
  x[:, :, 1] += 116.779
  x[:, :, 2] += 123.68

  # format bgr to rgb 
  x = x[:, :, ::-1]
  x = np.clip(x, 0, 255).astype('uint8')
  return x 

# the gram matrix of an image tensor is the inner product between the vectorized feature map in a layer.
# it is used to compute the style lo, minimizing the mean squared distance between the feature correlation map of the style imag
# and the input image 

def gram_matrix(x):
  features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
  gram = K.dot(features, K.transpose(features))
  return gram 

# the style_loss_per_layer represents the loss between the style of the style reference image and the generated image.
# it depends of the gram matrices of features maps from the style reference image and from the generated image.

def style_loss_per_layer(style, combination, resized_width, resized_height):
  S = gram_matrix(style)
  C = gram_matrix(combination)
  channels = 3
  size = resized_width * resized_height
  return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))

# the total_style_loss represents the total loss between the style of the style reference image and generated image, 
# taking into account all the layers considered for the style transfer, related to the style reference image.

def total_style_loss(feature_layers, output_dict, resized_width, resized_height, style_weight):
  loss = K.variable(0.)
  for layer_name in feature_layer:
    layer_features = output_dict[layer_name]
    style_reference_features = layers_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    sl - style_loss_per_layer(style_reference_features, combination_features, resized_width, resized_height)
    loss += (style_weight / len(featue_layers)) * sl 
  return loss 

# the content loss maintains the features of the content image in the generated image.
def content_loss(layers_features):
  base_image_features = layer_features[0, :, :, :]
  combination_features = layers_features[2, :, :, :]
  return K.sum(K.square(combination_features - base_image_features))

# the total variation loss maintains the generated image localy coherent,
# smoothing the pixel variatants among the neighbour pixels.
def total_variation(x, resized_width, resized_height):
  a = K.square(x[:, :resized_width - 1, :resized_height - 1, :] - x[:, 1: :resized_height - 1, :]) 
  b = K.square(x[:, :resized_width - 1, :resized_height - 1, :] - x[:, :resized_height - 1, 1, :])
  return K.sum(K.pow(a + b, 1.25))

def total_loss(outputs_dict, content_weight, resized_width, resized_height, style_weight, total_variation_weight, combination_image):
   loss = K.variable(0.)

   # contribution of content_loss
   features_layers_content = outputs_dict['block5_conv2']
   loss += content_weight * content_loss(features_layers_content)

   # contribution of style_loss
   feature_layer_style = ['block1_conv1', 'block2_conv1',
                          'block3_conv1', 'block4_conv1',
                          'block5_conv1']
   loss += total_style_loss(features_layers_style, outputs_dict, resized_width, resized_height, style_weight) * style_weight

   # contribution of variation_loss
   loss += total_variation_weight * total_variation_loss(combination_image, resized_width, resized_height)
   return loss 

# evaluate the loss and gradients respect to the generated image. it is called in the evaluator, necessary to 
# to compute gradients and the loss as two different functions (limitation of the L-BFGS algorithmn) without
# excessive losses in performance 

def eval_loss_and_grads(x, resized_width, resized_height, f_outputs):
  x = x.reshape((1, resized_width, resized_height, 3))
  outs = f_outputs([x])
  loss_value = outs[0]
  if len(outs[1:]) == 1:
    grad_values = outs[1].flatten().astype('float64')
  else:
    grad_values = np.array(outs[1:]).flatten().astype('float64')
  return loss_value, grad_values
  
# save generated pictures
def save(filename, generated):
  save_img(filename, Image.fromarray(generated))

# evaluator returns the loss and the gradient in two separate functions, but the calculation of the two variables
# are dependent. this reduces the computation time, since otherwise it would be calculated separately.

class Evaluator(object):
  def __init__(self, resized_width, resized_height, f_outputs):
    self.loss_value = None
    self.grad_values = None 
    self.resized_width = resized_width
    self.resized_height = resized_height
    self.f_outputs = f_outputs 

  def loss(self, x):
    assert self.loss_value is None 
    loss_value, grad_values = eval_loss_and_grads(x, self.resized_width, self.resized_height, self.f_outputs)
    self.loss_value = loss_value 
    self.grad_values = grad_values
    return self.loss_value 
  def grad(self, x):
    assert self.loss_value is not None 
    grad_values = np.copy(self.grad_values)
    self.loss_value = None
    self.grad_values = None
    return grad_values

def run(args):
  # variable declaration
  base_image_path = f"reference_images/base_image/{args.base_image}"
  style_reference_image_path = "reference_images/style_image/{args.style_image}"
  iterations = args.iterations 

  # weights to compare the final loss
  totaal_variation_weight = 1
  style_weight = 2
  contetn_weight = 5

  # dimensions of the generated picture.
  width, height = load_imag(base_image_path).size
  resized_width = 400
  resized_height = int(width * resized_width / height)

  # get tensor representation of the image
  base_image = K.variable(preprocess_image(base_image_path, resized_width, resized_height))
  style_reference_image = K.variable(preprocess_image(style_reference_image_path, resized_width, resized_height))

  # place holder for generated images
  combination_image = K.placeholder((1, resized_width, resized_height, 3))

  # combine the 3 images into a single keras tensor 
  input_tensor = K.concatenate([base_image, style_reference_image, combination_image], axis=0)

  # build the vgg19 network with our 3 images as input 
  # the model is loaded with pre-trained ImageNet weights
  model = vgg19.VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False)

  # get the outputs of each key layer, through unique names.
  outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
  loss = total_loss(outputs_dict, content_weight, resized_width, resized_height, style_weight, total_variation_weight, combination_image)


  # get the gradient of the generated image.
  grads = K.gradients(loss, combination_image)
  outputs = [loss]
  outputs += grads

  f_outputs = K.function([combination_image], outputs)
    
  evaluator = Evaluator(resized_width, resized_height, f_outputs)

  x = preprocess_image(base_image_path, resized_width, resized_height)
    
  # The oprimizer is fmin_l_bfgs
  for i in range(iterations):
      print('Iteration: ', i)
      x, min_val, info = fmin_l_bfgs_b(evaluator.loss,
                                        x.flatten(),
                                        fprime=evaluator.grads,
                                        maxfun=25)
  
      print('Current loss value:', min_val)
  
      # Save current generated image
      img = deprocess_image(x.copy(), resized_width, resized_height)
      fname = 'results/' + np.str(i) + '.png'
      save(fname, img)




print("\n\tdone")

if __name__ == '__main__':
    # Set options to activate or deactivate the game view, and its speed
    parser = argparse.ArgumentParser()
    parser.add_argument("--base_image", type=str, default="jumping_me.jpg")
    parser.add_argument("--style_image", type=str, default="starry_night.jpg")
    parser.add_argument("--iterations", type=int, default=20)
    args = parser.parse_args()
    run(args)











	done


usage: ipykernel_launcher.py [-h] [--base_image BASE_IMAGE]
                             [--style_image STYLE_IMAGE]
                             [--iterations ITERATIONS]
ipykernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-d77b66cf-8e7d-465c-9287-83742dbfc3bc.json


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
