In [1]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, AutoModelForCausalLM, AutoTokenizer 
import torch 
import nltk 
from diffusers import DiffusionPipeline

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")

SEED = torch.manual_seed(336) 

2024-12-16 11:29:12.633628: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734370152.666675   62708 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734370152.676623   62708 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-16 11:29:12.751858: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [2]:
# Get 3 sentences or lesser from the generated prompt
def get_processed_prompt(text):
    sentences = nltk.sent_tokenize(text)
    return ' '.join(sentences[:2])
    
    #return text  #Testing without preprocessing the prompt

In [3]:
llm_args = {
    'max_length': 100,
    'no_repeat_ngram_size': 1,
    #'temperature': 1.3,
    'top_p': 0.9,
    'top_k': 100,
    'do_sample': True
}

image_params = {'num_inference_steps':50, 
                'num_images_per_prompt':1,
                'generator':SEED, 
                'guidance_scale':15,
                'negative_prompt':"animated, bad, terrible, low quality, weird"} 


# GPT

In [4]:
old_gpt_model = AutoModelForCausalLM.from_pretrained('./models/fine_tuned_gpt') 
old_gpt_tokenizer = AutoTokenizer.from_pretrained('./models/fine_tuned_gpt') 


## GPT on new data

In [5]:
new_gpt_model = AutoModelForCausalLM.from_pretrained('./models/fine_tuned_gpt_new_data') 
new_gpt_tokenizer = AutoTokenizer.from_pretrained('./models/fine_tuned_gpt_new_data') 


# T5

In [6]:
t5model = T5ForConditionalGeneration.from_pretrained('./models/fine_tuned_t5') 
t5tokenizer = T5Tokenizer.from_pretrained('./models/fine_tuned_t5') 

## All images

In [7]:
# Function to generate images from enhanced prompts
def generate_images_and_prompts(input_text):


    # Original Image
    og_image = pipe.to(torch.device('cuda'))(input_text, **image_params).images[0]


    #T5 image
    # Tokenize the input text
    t5_inputs = t5tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

    t5_outputs = t5model.to(torch.device('cpu')).generate(
        input_ids=t5_inputs['input_ids'],
        attention_mask=t5_inputs['attention_mask'],
        temperature=0.3,
        **llm_args
    )
    t5_generated_text = get_processed_prompt(t5tokenizer.decode(t5_outputs[0], skip_special_tokens=True))
    t5_image = pipe.to(torch.device('cuda'))(t5_generated_text, **image_params).images[0]


    # GPT image
    old_gpt_inputs = old_gpt_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

    outputs = old_gpt_model.to(torch.device('cpu')).generate(
        input_ids=old_gpt_inputs['input_ids'],
        attention_mask=old_gpt_inputs['attention_mask'],
        temperature=0.3,
        **llm_args
    )
    gpt_generated_text = get_processed_prompt(old_gpt_tokenizer.decode(outputs[0], skip_special_tokens=True))
    gpt_image = pipe.to(torch.device('cuda'))(gpt_generated_text, **image_params).images[0]


    # New GPT image
    new_gpt_inputs = new_gpt_tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)

    new_gpt_outputs = new_gpt_model.to(torch.device('cpu')).generate(
        input_ids=new_gpt_inputs['input_ids'],
        attention_mask=new_gpt_inputs['attention_mask'],
        temperature=1.5,
        **llm_args
    )
    new_gpt_generated_text = get_processed_prompt(new_gpt_tokenizer.decode(new_gpt_outputs[0], skip_special_tokens=True))
    new_gpt_image = pipe.to(torch.device('cuda'))(new_gpt_generated_text, **image_params).images[0]

 
    return input_text, og_image, t5_generated_text, t5_image, gpt_generated_text, gpt_image, new_gpt_generated_text, new_gpt_image


In [8]:
import gradio as gr

# Gradio Interface setup
iface = gr.Interface(
    fn=generate_images_and_prompts, 
    inputs=gr.Textbox(label="Enter your prompt for image generation"),
    outputs=[
        gr.Textbox(label="Original Prompt"),
        gr.Image(type="pil", label="Image generated from Original Prompt"),

        gr.Textbox(label="Prompt enhanced by T5"),
        gr.Image(type="pil", label="Generated Image"),

        gr.Textbox(label="Prompt enhanced by GPT on old data"),
        gr.Image(type="pil", label="Generated Image"),

        gr.Textbox(label="Prompt enhanced by GPT on new data"),
        gr.Image(type="pil", label="Generated Image"),

    ],
    live=False,
    description="App"
)

# Launch the Gradio app
iface.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  0%|          | 0/50 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


  0%|          | 0/50 [00:00<?, ?it/s]