<a href="https://colab.research.google.com/github/maanqii/coding_three_final/blob/main/coding_three_change2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

####Notes provided by chatgpt

In [None]:

# Import what we need
import os
import sys
import numpy as np
import scipy.io
import scipy.misc
import tensorflow as tf  # Import TensorFlow after Scipy or Scipy will break

In [None]:
###############################################################################
# Constants for the image input and output.
###############################################################################

# Output folder for the images.
OUTPUT_DIR = 'output/'
# Style image to use.
STYLE_IMAGE = 'images/guernica.jpg'
# Content image to use.
CONTENT_IMAGE = 'images/hongkong.jpg'
# Image dimensions constants.
IMAGE_WIDTH = 800
IMAGE_HEIGHT = 600
COLOR_CHANNELS = 3

In [None]:
###############################################################################
# Algorithm constants
###############################################################################
# Noise ratio. Percentage of weight of the noise for intermixing with the
# content image.
NOISE_RATIO = 0.6
# Constant to put more emphasis on content loss.
BETA = 5
# Constant to put more emphasis on style loss.
ALPHA = 100
# Path to the deep learning model. This is more than 500MB so will not be
# included in the repository, but available to download at the model Zoo:
# Link: https://github.com/BVLC/caffe/wiki/Model-Zoo
#
# Pick the VGG 19-layer model by from the paper "Very Deep Convolutional
# Networks for Large-Scale Image Recognition".

## Set the number of iterations
ITERATIONS = 1000

VGG_MODEL = 'imagenet-vgg-verydeep-19.mat'
# The mean to subtract from the input to the VGG model. This is the mean that
# when the VGG was used to train. Minor changes to this will make a lot of
# difference to the performance of model.
MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))

## Set the convolution layers to be used
CONTENT_LAYERS = [('conv4_2', 1.)]
STYLE_LAYERS = [('conv1_1', 0.2), ('conv2_1', 0.2), ('conv3_1', 0.2), ('conv4_1', 0.2), ('conv5_1', 0.2)]

In [None]:
##takes three parameters: ntype (type of the layer), nin (input to the layer), and nwb (weights and biases for the layer, optional).
##It builds a layer based on the ntype and returns the output of the layer. It supports two types of layers: convolutional ('conv') and pooling ('pool').
##For a convolutional layer, it applies the convolution operation with given strides and padding, and then applies the ReLU activation function.
def build_net(ntype, nin, nwb=None):
    if ntype == 'conv':
        return tf.nn.relu(tf.nn.conv2d(nin, nwb[0], strides=[1, 1, 1, 1], padding='SAME') + nwb[1])
    elif ntype == 'pool':
        return tf.nn.avg_pool(nin, ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1], padding='SAME')


def get_weight_bias(vgg_layers, i):
    weights = vgg_layers[i][0][0][2][0][0]
    weights = tf.constant(weights)
    bias = vgg_layers[i][0][0][2][0][1]
    bias = tf.constant(np.reshape(bias, (bias.size)))
    return weights, bias


def build_vgg19(path):
    net = {}
    vgg_rawnet = scipy.io.loadmat(path)
    vgg_layers = vgg_rawnet['layers'][0]
    net['input'] = tf.Variable(np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, 3)).astype('float32'))
    net['conv1_1'] = build_net('conv', net['input'], get_weight_bias(vgg_layers, 0))
    net['conv1_2'] = build_net('conv', net['conv1_1'], get_weight_bias(vgg_layers, 2))
    net['pool1'] = build_net('pool', net['conv1_2'])
    net['conv2_1'] = build_net('conv', net['pool1'], get_weight_bias(vgg_layers, 5))
    net['conv2_2'] = build_net('conv', net['conv2_1'], get_weight_bias(vgg_layers, 7))
    net['pool2'] = build_net('pool', net['conv2_2'])
    net['conv3_1'] = build_net('conv', net['pool2'], get_weight_bias(vgg_layers, 10))
    net['conv3_2'] = build_net('conv', net['conv3_1'], get_weight_bias(vgg_layers, 12))
    net['conv3_3'] = build_net('conv', net['conv3_2'], get_weight_bias(vgg_layers, 14))
    net['conv3_4'] = build_net('conv', net['conv3_3'], get_weight_bias(vgg_layers, 16))
    net['pool3'] = build_net('pool', net['conv3_4'])
    net['conv4_1'] = build_net('conv', net['pool3'], get_weight_bias(vgg_layers, 19))
    net['conv4_2'] = build_net('conv', net['conv4_1'], get_weight_bias(vgg_layers, 21))
    net['conv4_3'] = build_net('conv', net['conv4_2'], get_weight_bias(vgg_layers, 23))
    net['conv4_4'] = build_net('conv', net['conv4_3'], get_weight_bias(vgg_layers, 25))
    net['pool4'] = build_net('pool', net['conv4_4'])
    net['conv5_1'] = build_net('conv', net['pool4'], get_weight_bias(vgg_layers, 28))
    net['conv5_2'] = build_net('conv', net['conv5_1'], get_weight_bias(vgg_layers, 30))
    net['conv5_3'] = build_net('conv', net['conv5_2'], get_weight_bias(vgg_layers, 32))
    net['conv5_4'] = build_net('conv', net['conv5_3'], get_weight_bias(vgg_layers, 34))
    net['pool5'] = build_net('pool', net['conv5_4'])
    return net

In [None]:
def generate_noise_image(content_image, noise_ratio = NOISE_RATIO):
    """
    Returns a noise image intermixed with the content image at a certain ratio.
    """
    noise_image = np.random.uniform(
            -20, 20,
            (1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)).astype('float32')
    # White noise image from the content representation. Take a weighted average
    # of the values
    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    return input_image

def load_image(path):
    image = scipy.misc.imread(path)
    # Resize the image for convnet input, there is no change but just
    # add an extra dimension.
    image = np.reshape(image, ((1,) + image.shape))
    # Input to the VGG model expects the mean to be subtracted.
    image = image - MEAN_VALUES
    return image

def save_image(path, image):
    # Output should add back the mean.
    image = image + MEAN_VALUES
    # Get rid of the first useless dimension, what remains is the image.
    image = image[0]
    image = np.clip(image, 0, 255).astype('uint8')
    scipy.misc.imsave(path, image)

In [None]:
##content_layer_loss(p, x): This function calculates the content loss between the feature representations of the content image (p) and the generated image (x). It computes the mean squared error (MSE) between x and p and scales it by a factor of 1 / (2 * N * M), where N represents the number of channels and M represents the spatial dimensions of the feature maps. The computed loss is returned.

##content_loss_func(sess, net): This function computes the total content loss for the generated image given the feature representations of the content image. It iterates over the specified CONTENT_LAYERS and calculates the content loss for each layer. The content representation p is obtained by running the corresponding layer in the network (net[layer_name]) using the TensorFlow session sess. The generated image representation x is already stored in net[layer_name]. The content loss for each layer is multiplied by its respective weight and accumulated to compute the total content loss. The final content loss is divided by the number of layers to obtain an average loss and returned.

##gram_matrix(x, area, depth): This function computes the Gram matrix of the feature maps x. It reshapes x into a 2D tensor of shape (area, depth), where area represents the spatial area (width * height) of the feature maps, and depth represents the number of channels. The Gram matrix is computed by taking the dot product of the reshaped x with its transpose. The resulting Gram matrix is returned.

##style_layer_loss(a, x): This function calculates the style loss between the Gram matrix of the style image (a) and the generated image (x). It computes the mean squared error (MSE) between x and a Gram matrices, scaled by a factor of 1 / (4 * N^2 * M^2), where N represents the number of channels and M represents the spatial dimensions of the feature maps. The computed loss is returned.

##style_loss_func(sess, net): This function computes the total style loss for the generated image given the Gram matrices of the style image. It iterates over the specified STYLE_LAYERS and calculates the style loss for each layer. The style representation a is obtained by running the corresponding layer in the network (net[layer_name]) using the TensorFlow session sess. The generated image representation x is already stored in net[layer_name]. The style loss for each layer is multiplied by its respective weight and accumulated to compute the total style loss. The final style loss is divided by the number of layers to obtain an average loss and returned.

def content_layer_loss(p, x):

    M = p.shape[1] * p.shape[2]
    N = p.shape[3]
    loss = (1. / (2 * N * M)) * tf.reduce_sum(tf.pow((x - p), 2))
    return loss


def content_loss_func(sess, net):

    layers = CONTENT_LAYERS
    total_content_loss = 0.0
    for layer_name, weight in layers:
        p = sess.run(net[layer_name])
        x = net[layer_name]
        total_content_loss += content_layer_loss(p, x)*weight

    total_content_loss /= float(len(layers))
    return total_content_loss


def gram_matrix(x, area, depth):

    x1 = tf.reshape(x, (area, depth))
    g = tf.matmul(tf.transpose(x1), x1)
    return g

def style_layer_loss(a, x):

    M = a.shape[1] * a.shape[2]
    N = a.shape[3]
    A = gram_matrix(a, M, N)
    G = gram_matrix(x, M, N)
    loss = (1. / (4 * N ** 2 * M ** 2)) * tf.reduce_sum(tf.pow((G - A), 2))
    return loss


def style_loss_func(sess, net):

    layers = STYLE_LAYERS
    total_style_loss = 0.0
    for layer_name, weight in layers:
        a = sess.run(net[layer_name])
        x = net[layer_name]
        total_style_loss += style_layer_loss(a, x) * weight
    total_style_loss /= float(len(layers))
    return total_style_loss

In [None]:
##net = build_vgg19(VGG_Model): The VGG19 network is built by calling the build_vgg19 function, which returns a dictionary containing the layers of the network. The VGG model path (VGG_Model) is provided as an argument.

##sess = tf.Session(): A TensorFlow session is created.

##sess.run(tf.initialize_all_variables()): The variables in the session are initialized.

##content_img = load_image(CONTENT_IMAGE): The content image is loaded using the load_image function. The content image path (CONTENT_IMAGE) is provided as an argument.

##style_img = load_image(STYLE_IMAGE): The style image is loaded using the load_image function. The style image path (STYLE_IMAGE) is provided as an argument.

##s##ess.run([net['input'].assign(content_img)]): The content image is assigned to the input variable of the network.

##cost_content = content_loss_func(sess, net): The content loss is calculated using the content_loss_func function.

##sess.run([net['input'].assign(style_img)]): The style image is assigned to the input variable of the network.

##cost_style = style_loss_func(sess, net): The style loss is calculated using the style_loss_func function.

##total_loss = alpha * cost_content + beta * cost_style: The total loss is computed as a weighted sum of the content loss (cost_content) and style loss (cost_style), where alpha and beta are weighting factors.

##optimizer = tf.train.AdamOptimizer(2.0): An Adam optimizer is created with a learning rate of 2.0.

##init_img = generate_noise_image(content_img): An initial image for optimization is generated by adding random noise to the content image using the generate_noise_image function.

##train_op = optimizer.minimize(total_loss): The optimization operation is defined using the Adam optimizer to minimize the total loss.

##sess.run(tf.initialize_all_variables()): The variables in the session are reinitialized.

##sess.run(net['input'].assign(init_img)): The initial image is assigned to the input variable of the network.

##The following code runs the training loop for a specified number of iterations (ITERATIONS):

sess.run(train_op): The optimization operation is executed to update the image towards minimizing the total loss.
The loss and current image information is printed every 100 iterations.
The current image is saved to the output directory using the save_image function.
def main():
    net = build_vgg19(VGG_Model)
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    content_img = load_image(CONTENT_IMAGE)
    style_img = load_image(STYLE_IMAGE)

    sess.run([net['input'].assign(content_img)])
    cost_content = content_loss_func(sess, net)

    sess.run([net['input'].assign(style_img)])
    cost_style = style_loss_func(sess, net)

    total_loss = alpha * cost_content + beta * cost_style
    optimizer = tf.train.AdamOptimizer(2.0)

    init_img = generate_noise_image(content_img)

    train_op = optimizer.minimize(total_loss)
    sess.run(tf.initialize_all_variables())
    sess.run(net['input'].assign(init_img))

    for it in range(ITERATIONS):
        sess.run(train_op)
        if it % 100 == 0:
            # Print every 100 iteration.
            mixed_image = sess.run(net['input'])
            print('Iteration %d' % (it))
            print('sum : ', sess.run(tf.reduce_sum(mixed_image)))
            print('cost: ', sess.run(total_loss))

            if not os.path.exists(OUTPUT_DIR):
                os.mkdir(OUTPUT_DIR)

            filename = 'output/%d.png' % (it)
            save_image(filename, mixed_image)

if __name__ == '__main__':
    main()