<a href="https://colab.research.google.com/github/meetptl04/ImageStyleTranfer/blob/main/ImageStyle1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.applications import vgg19
from tensorflow.keras.preprocessing import image as kp_image

# Utility class for image processing
class ImageProcessor:
    def __init__(self, max_dim=512):
        self.max_dim = max_dim

    def load_and_process_img(self, path_to_img):
        img = cv2.imread(path_to_img)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self._resize_img(img)
        img = np.expand_dims(img, axis=0)
        img = vgg19.preprocess_input(img)
        return img

    def _resize_img(self, img):
        long = max(img.shape[:2])
        scale = self.max_dim / long
        new_shape = (int(img.shape[1] * scale), int(img.shape[0] * scale))
        img = cv2.resize(img, new_shape)
        return img

    def deprocess_img(self, processed_img):
        x = processed_img.copy()
        if len(x.shape) == 4:
            x = np.squeeze(x, 0)
        x[:, :, 0] += 103.939
        x[:, :, 1] += 116.779
        x[:, :, 2] += 123.68
        x = x[:, :, ::-1]
        x = np.clip(x, 0, 255).astype('uint8')
        return x

# Style Transfer class
class StyleTransfer:
    def __init__(self, content_img_path, style_img_path):
        self.image_processor = ImageProcessor()
        self.content_img = self.image_processor.load_and_process_img(content_img_path)
        self.style_img = self.image_processor.load_and_process_img(style_img_path)

        # Initialize content and style layers
        self.content_layers = ['block5_conv2']
        self.style_layers = ['block1_conv1',
                             'block2_conv1',
                             'block3_conv1',
                             'block4_conv1',
                             'block5_conv1']
        self.num_content_layers = len(self.content_layers)
        self.num_style_layers = len(self.style_layers)

        self.model = self._load_model()

    def _load_model(self):
        vgg = vgg19.VGG19(include_top=False, weights='imagenet')
        vgg.trainable = False
        outputs = [vgg.get_layer(name).output for name in self.style_layers + self.content_layers]
        model = tf.keras.Model([vgg.input], outputs)
        return model

    def _gram_matrix(self, input_tensor):
        result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
        input_shape = tf.shape(input_tensor)
        num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
        return result / num_locations

    def _get_content_loss(self, base_content, target):
        print(f'Base content shape: {base_content.shape}')
        print(f'Target shape: {target.shape}')
        return tf.reduce_mean(tf.square(base_content - target))

    def _get_style_loss(self, base_style, gram_target):
        gram_style = self._gram_matrix(base_style)
        print(f'Base style shape: {base_style.shape}')
        print(f'Gram target shape: {gram_target.shape}')
        return tf.reduce_mean(tf.square(gram_style - gram_target))

    def _compute_loss(self, model, loss_weights, init_image, gram_style_features, content_features):
        style_weight, content_weight = loss_weights
        model_outputs = model(init_image)

        # Check shapes
        print(f'Model outputs shape: {len(model_outputs)}')
        for output in model_outputs:
            print(f'Output shape: {output.shape}')

        style_output_features = model_outputs[:self.num_style_layers]
        content_output_features = model_outputs[self.num_style_layers:]

        style_score = 0
        content_score = 0

        for target_style, comb_style in zip(gram_style_features, style_output_features):
            print(f'Gram style feature shape: {target_style.shape}')
            print(f'Combined style feature shape: {comb_style.shape}')
            style_score += self._get_style_loss(comb_style, target_style)

        for target_content, comb_content in zip(content_features, content_output_features):
            print(f'Target content shape: {target_content.shape}')
            print(f'Combined content feature shape: {comb_content.shape}')
            content_score += self._get_content_loss(comb_content, target_content)

        style_score *= style_weight / self.num_style_layers
        content_score *= content_weight / self.num_content_layers

        loss = style_score + content_score
        return loss, style_score, content_score

    def _compute_grads(self, cfg):
        with tf.GradientTape() as tape:
            all_loss = self._compute_loss(**cfg)

        total_loss = all_loss[0]
        return tape.gradient(total_loss, cfg['init_image']), all_loss

    def run(self, num_iterations=2000, content_weight=1e3, style_weight=1e-2):
        style_features = self.model(self.style_img)[:self.num_style_layers]
        content_features = self.model(self.content_img)[self.num_content_layers:]
        gram_style_features = [self._gram_matrix(style_feature) for style_feature in style_features]

        init_image = tf.Variable(self.content_img, dtype=tf.float32)
        opt = tf.optimizers.Adam(learning_rate=5.0)

        best_loss, best_img = float('inf'), None

        loss_weights = (style_weight, content_weight)
        cfg = {
            'model': self.model,
            'loss_weights': loss_weights,
            'init_image': init_image,
            'gram_style_features': gram_style_features,
            'content_features': content_features
        }

        for i in range(num_iterations):
            grads, all_loss = self._compute_grads(cfg)
            loss, style_score, content_score = all_loss

            # Debugging prints
            print(f'Iteration {i}: loss={loss}, style_loss={style_score}, content_loss={content_score}')
            print(f'init_image shape: {init_image.shape}')
            print(f'grads shape: {grads.shape}')

            opt.apply_gradients([(grads, init_image)])
            clipped = tf.clip_by_value(init_image, -1.0, 1.0)
            init_image.assign(clipped)

            if loss < best_loss:
                best_loss = loss
                best_img = init_image.numpy()

        return self.image_processor.deprocess_img(best_img)

# Example usage
style_transfer = StyleTransfer('/content/Dancing-House-Praga-1.jpg', '/content/the-starry-night.jpg')
result_img = style_transfer.run(num_iterations=500, content_weight=1e3, style_weight=1e-2)

# Display the result
plt.imshow(result_img)
plt.title('Styled Image')
plt.axis('off')
plt.show()


Model outputs shape: 6
Output shape: (1, 356, 512, 64)
Output shape: (1, 178, 256, 128)
Output shape: (1, 89, 128, 256)
Output shape: (1, 44, 64, 512)
Output shape: (1, 22, 32, 512)
Output shape: (1, 22, 32, 512)
Gram style feature shape: (1, 64, 64)
Combined style feature shape: (1, 356, 512, 64)
Base style shape: (1, 356, 512, 64)
Gram target shape: (1, 64, 64)
Gram style feature shape: (1, 128, 128)
Combined style feature shape: (1, 178, 256, 128)
Base style shape: (1, 178, 256, 128)
Gram target shape: (1, 128, 128)
Gram style feature shape: (1, 256, 256)
Combined style feature shape: (1, 89, 128, 256)
Base style shape: (1, 89, 128, 256)
Gram target shape: (1, 256, 256)
Gram style feature shape: (1, 512, 512)
Combined style feature shape: (1, 44, 64, 512)
Base style shape: (1, 44, 64, 512)
Gram target shape: (1, 512, 512)
Gram style feature shape: (1, 512, 512)
Combined style feature shape: (1, 22, 32, 512)
Base style shape: (1, 22, 32, 512)
Gram target shape: (1, 512, 512)
Target c

InvalidArgumentError: {{function_node __wrapped__Sub_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:Sub] name: 