In [None]:
#Import modules
from tensorflow.keras.applications import inception_v3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.random import normal
import numpy as np
import cv2
from transformers import CLIPProcessor, CLIPModel

In [None]:
#Load pre-trained models
inc_model = inception_v3.InceptionV3(weights='imagenet', include_top=False)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

In [None]:
#Function - Preprocess an image
def preprocess_img(img_path):
    img = load_img(img_path, target_size = (300, 300))
    x = img_to_array(img)
    y = np.expand_dims(x, axis = 0)
    x = y
    x = x / 127.5 - 1.
    #Scales the pixel intensity range from 0 - 255 to a range centered around 0
    #Positive values: Brighter pixels
    #Negative values: Darker pixels
    return x

In [None]:
#Function - Generate image noise
def generate_noise(shape):
    return normal(0, 1, size = shape).astype("float32")

In [None]:
#Function - Generate a virtual image
def virtual_image(base_img_path, text_prompt, iterations = 10):
    base_img = preprocess_img(base_img_path)
    virtual_img = generate_noise(base_img.shape)
    inputs = processer(text_prompt, return_tensors = 'pt')
    with torch.no_grad():
        text_features = model(**inputs).pooler_output
    for i in range(iterations):
        virutal_features = inception_model.predict(virtual_img)
        #Calculate the loss function
        loss = np.mean(virtual_features) - np.dot(text_features.squeeze(), virtual_features.mean(axis = (1, 2)))
        #Calculate the gradient descent
        grad_des = np.gradient(loss)[0]
        octa = np.stack([grad_des,(grad_des[:, :, 1:] + grad_des[:, :, :-1]) / 2, grad_des[:, :, :-2] + grad_des[:, :, 1:]])
        virtual_img += octa * 0.1
    #De-processing the virtual image
    virtual_img[:, :, 0] += 1.
    virtual_img[:, :, 1] += 1.
    virtual_img[:, :, 2] += 1.
    virtual_img *= 127.5
    virtual_img = np.clip(virtual_img, 0, 255).astype('uint8')
    #Combining virtual image with the org one
    base_img = cv2.imread(base_img_path)
    virtual_img = cv2.resize(virtual_img, (base_img.shape[1], base_img.shape[0]))
    alpha = 0.5
    blended_img = cv2.addWeighted(base_img, alpha, virtual_img, 1 - alpha, 0)
    return blended_img

In [None]:
#Main
base_img = "" #Path of the file
text_prompt = input("Enter what kind of image you want: ")
virtual = virtual_image(base_img, text_prompt, iterations = 20)
cv2.inwrite("generated_image.jpg", virtual)
#Done