In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch


In [None]:

class ConceptPromptGenerator:
    def __init__(self, model_name="microsoft/Phi-4-reasoning-plus", max_new_tokens=300, device=None):
        self.device = device if device is not None else ("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
            device_map=self.device,
            trust_remote_code=True
        )
        
        self.model.to(self.device)
        self.generator = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,

        )
        self.max_new_tokens = max_new_tokens

    def generate(self, concept: str, category: str) -> dict:
        few_shot = (
            "You are an expert in AI art prompting and factual summaries.\n"
            "Given a concept and its category, generate:\n"
            "1. A vivid prompt for Stable Diffusion 1.5\n"
            "2. A negative prompt to avoid rendering issues\n"
            "3. A short fact under 50 characters\n\n"
            "Example:\n"
            "Concept: dog\n"
            "Category: animal\n"
            "Prompt: a happy golden retriever playing in a field, photorealistic, warm sunlight, detailed fur, 4k, realistic anatomy\n"
            "Negative Prompt: blurry, extra limbs, distorted, low quality, overexposed, unrealistic eyes\n"
            "Fact: Dogs bark to communicate\n\n"
            f"Concept: {concept}\n"
            f"Category: {category}\n"
            "Prompt:"
        )

        output = self.generator(
            few_shot,
            max_new_tokens=self.max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
        )[0]["generated_text"]

        # Parse output from where the concept starts
        output = output.split(f"Concept: {concept}")[1]

        result = {"Prompt": "", "Negative Prompt": "", "Fact": ""}
        for line in output.splitlines():
            if line.startswith("Prompt:"):
                result["Prompt"] = line[len("Prompt:"):].strip()
            elif line.startswith("Negative Prompt:"):
                result["Negative Prompt"] = line[len("Negative Prompt:"):].strip()
            elif line.startswith("Fact:"):
                result["Fact"] = line[len("Fact:"):].strip()
            if all(result.values()):
                break
        return result
    
    

In [None]:
conceptPromptGenerator=ConceptPromptGenerator(device="cpu" )
output=conceptPromptGenerator.generate("cat",'animal')
print(output)

In [None]:
import cv2
import numpy as np
from PIL import Image

def add_text_to_image(
    image,
    text: str,
    org: tuple[int | None, int | None] = (10, 30),  # Bottom-left corner of the text string
    font_face: int = cv2.FONT_HERSHEY_SIMPLEX,
    font_scale: float = 1.0,
    color: tuple[int, int, int] = (0, 0, 0),  # BGR color (Black by default)
    thickness: int = 2,
    background_color: tuple[int, int, int] = (255, 255, 255), # White background for new canvas
    text_background_color: tuple[int, int, int] = (255, 255, 255), # White background for text by default
    text_background_transparency: float = 0.8, # 80% transparency by default
    padding_x: int = 5, # Horizontal padding for text background
    padding_y: int = 5, # Vertical padding for text background
    wordwrap: bool = False # New parameter: if True, wraps text to fit image width
) -> np.ndarray:
    """
    Adds text to an image, extending the image size if the text falls outside
    the original boundaries. Supports word wrapping.

    Args:
        image: The input image. Can be an OpenCV (numpy.ndarray) or Pillow (PIL.Image.Image) image.
        text (str): The text string to add.
        org (tuple[int | None, int | None]): The bottom-left corner of the text string in (x, y) coordinates.
                               Defaults to (10, 30). If x or y is None, it will be centered in that direction.
        font_face (int): Font type. See cv2.FONT_HERSHEY_* for options.
                         Defaults to cv2.FONT_HERSHEY_SIMPLEX.
        font_scale (float): Font scale factor multiplied by the font-specific base size.
                            Defaults to 1.0.
        color (tuple[int, int, int]): Text color in BGR format. Defaults to (0, 0, 0) (Black).
        thickness (int): Thickness of the text lines. Defaults to 2.
        background_color (tuple[int, int, int]): Color to fill the extended canvas if the image
                                                  needs to be resized. Defaults to (255, 255, 255) (White).
        text_background_color (tuple[int, int, int]): Color of the text's background in BGR format.
                                                       Defaults to (255, 255, 255) (White).
        text_background_transparency (float): Transparency of the text background.
                                              Value between 0.0 (fully transparent) and 1.0 (fully opaque).
                                              Defaults to 0.8 (80% transparent).
        padding_x (int): Horizontal padding to add around the text background. Defaults to 5 pixels.
        padding_y (int): Vertical padding to add around the text background. Defaults to 5 pixels.
        wordwrap (bool): If True, wraps text to fit within the image width, breaking at spaces.
                         Defaults to False.

    Returns:
        numpy.ndarray: The image with the added text, in OpenCV (BGR) format.
    """

    # 1. Handle Image Input: Convert Pillow image to OpenCV format if necessary
    if isinstance(image, Image.Image):
        img_np = np.array(image)
        if img_np.ndim == 2: # Grayscale image
            img_cv = cv2.cvtColor(img_np, cv2.COLOR_GRAY2BGR)
        elif img_np.shape[2] == 4: # RGBA image
            img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
        else: # RGB image
            img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
    elif isinstance(image, np.ndarray):
        img_cv = image
        # Ensure the image is BGR (3 channels) if it's grayscale
        if img_cv.ndim == 2:
            img_cv = cv2.cvtColor(img_cv, cv2.COLOR_GRAY2BGR)
        elif img_cv.shape[2] == 4: # Handle RGBA if passed as numpy array
            img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGBA2BGR)
    else:
        raise TypeError("Input image must be a PIL Image or a NumPy array (OpenCV format).")

    # Get original image dimensions
    h_orig, w_orig = img_cv.shape[:2]

    # Calculate representative text height and baseline for a single line.
    # This is used for consistent line spacing and overall text block height calculations.
    (text_w_dummy, text_h_single_line), baseline_single_line = cv2.getTextSize(
        "Tg", font_face, font_scale, thickness
    )
    # A reasonable spacing between lines, often a percentage of the font height.
    line_spacing = int(text_h_single_line * 0.5)

    # --- Word Wrapping Logic ---
    wrapped_lines_info = [] # This list will store tuples of (line_text, line_width, line_height, line_baseline) for each line.
    max_overall_text_width = 0 # Stores the width of the widest line
    
    if wordwrap:
        # Determine the maximum available width for wrapping the text within the image.
        # This considers the initial x-position and padding.
        if org[0] is None: # If horizontally centered, available width is image width minus double padding.
            available_width_for_wrapping = w_orig - (2 * padding_x)
        else: # If a specific x-coordinate is provided, available width is from that point to the right edge.
            available_width_for_wrapping = w_orig - org[0] - padding_x
        
        # Ensure the available width is not negative or too small to avoid issues.
        available_width_for_wrapping = max(10, available_width_for_wrapping)

        words = text.split(' ')
        current_line_words = []
        current_line_text = ""

        for word in words:
            # Construct a test line by adding the current word (with a space if not the first word).
            test_line_text = (current_line_text + " " + word).strip()
            # Get the size of this potential line.
            (test_w, _), _ = cv2.getTextSize(
                test_line_text, font_face, font_scale, thickness
            )

            # If adding the word makes the line too long AND there are already words in the current line,
            # then the current line is complete and the new word starts a new line.
            if test_w > available_width_for_wrapping and len(current_line_words) > 0:
                # Calculate the actual size of the completed line.
                (line_w, line_h), line_baseline = cv2.getTextSize(
                    current_line_text, font_face, font_scale, thickness
                )
                wrapped_lines_info.append((current_line_text, line_w, line_h, line_baseline))
                max_overall_text_width = max(max_overall_text_width, line_w)

                # Start a new line with the current word.
                current_line_words = [word]
                current_line_text = word
            else:
                # The word fits, so add it to the current line.
                current_line_words.append(word)
                current_line_text = " ".join(current_line_words)
        
        # After the loop, add any remaining text in the current_line_text as the last line.
        if current_line_text:
            (line_w, line_h), line_baseline = cv2.getTextSize(
                current_line_text, font_face, font_scale, thickness
            )
            wrapped_lines_info.append((current_line_text, line_w, line_h, line_baseline))
            max_overall_text_width = max(max_overall_text_width, line_w)

    else: # If word wrapping is not enabled, treat the entire text as a single line.
        (text_w, text_h), baseline = cv2.getTextSize(text, font_face, font_scale, thickness)
        wrapped_lines_info.append((text, text_w, text_h, baseline))
        max_overall_text_width = text_w
    
    # Calculate the total height required by the entire block of wrapped text.
    # This is the height from the top of the first line's ascenders to the bottom of the last line's descenders.
    total_text_block_content_height = 0
    if wrapped_lines_info:
        # Top of the first line relative to its baseline (negative value)
        first_line_top_offset_from_baseline = -(wrapped_lines_info[0][2] - wrapped_lines_info[0][3])
        
        # Baseline of the last line relative to the first line's baseline
        last_line_baseline_offset_from_first_baseline = 0
        if len(wrapped_lines_info) > 1:
            last_line_baseline_offset_from_first_baseline = (len(wrapped_lines_info) - 1) * \
                                                              (text_h_single_line + line_spacing)
        
        # Bottom of the last line relative to its own baseline
        last_line_bottom_offset_from_its_baseline = wrapped_lines_info[-1][3]

        # Total content height = (last line's baseline + its bottom offset) - (first line's baseline + its top offset)
        # We assume the first line's baseline is at y=0 for this calculation of the *span*.
        total_text_block_content_height = (last_line_baseline_offset_from_first_baseline + last_line_bottom_offset_from_its_baseline) - \
                                          (first_line_top_offset_from_baseline)

    # --- Determine Canvas Extension (Pre-calculation of text block position for extension check) ---
    new_w, new_h = w_orig, h_orig
    offset_x_canvas, offset_y_canvas = 0, 0

    # Calculate the *desired* top-left corner of the text block's content area (without padding)
    # relative to the original image's (0,0) if no canvas extension happens.
    temp_text_block_x_content_start = org[0] if org[0] is not None else int((w_orig - max_overall_text_width) / 2)
    
    # temp_text_block_y_content_top represents the Y-coordinate of the *visual top* of the entire text block.
    if org[1] is not None: # org[1] is the baseline of the first line
        temp_text_block_y_content_top = org[1] + first_line_top_offset_from_baseline
    else: # Vertical centering
        temp_text_block_y_content_top = int((h_orig - total_text_block_content_height) / 2)

    # Calculate bounding box for text block with padding for extension check
    padded_x1 = temp_text_block_x_content_start - padding_x
    padded_y1 = temp_text_block_y_content_top - padding_y 
    padded_x2 = temp_text_block_x_content_start + max_overall_text_width + padding_x
    padded_y2 = temp_text_block_y_content_top + total_text_block_content_height + padding_y

    # Check for left extension
    if padded_x1 < 0:
        offset_x_canvas = -padded_x1
        new_w += offset_x_canvas

    # Check for top extension
    if padded_y1 < 0:
        offset_y_canvas = -padded_y1
        new_h += offset_y_canvas

    # Recalculate padded coordinates based on potentially adjusted new_w/new_h
    # This is needed to check for right/bottom extension against the *potential* new size.
    # The new_w/new_h might have increased due to left/top extensions.
    # The text block's position on this *potential* new canvas:
    current_text_block_x_on_potential_canvas = temp_text_block_x_content_start + offset_x_canvas
    current_text_block_y_top_on_potential_canvas = temp_text_block_y_content_top + offset_y_canvas

    padded_x2_after_offset = current_text_block_x_on_potential_canvas + max_overall_text_width + padding_x
    padded_y2_after_offset = current_text_block_y_top_on_potential_canvas + total_text_block_content_height + padding_y

    # Check for right extension
    if padded_x2_after_offset > new_w:
        new_w = padded_x2_after_offset

    # Check for bottom extension
    if padded_y2_after_offset > new_h:
        new_h = padded_y2_after_offset

    # 4. Create new canvas if needed and paste original image.
    if new_w > w_orig or new_h > h_orig:
        # Create a new blank canvas with the specified background color.
        new_image_canvas = np.full((new_h, new_w, 3), background_color, dtype=np.uint8)
        # Paste the original image onto the new canvas at the calculated offset.
        new_image_canvas[offset_y_canvas : offset_y_canvas + h_orig,
                         offset_x_canvas : offset_x_canvas + w_orig] = img_cv
        img_cv = new_image_canvas
    
    # Calculate the FINAL position of the text block's *content area* top-left corner on the (potentially new) canvas.
    final_text_block_x_content_on_canvas = 0
    final_text_block_y_content_top_on_canvas = 0

    if org[0] is None: # Horizontal centering
        # Center the entire text block (based on its widest line) horizontally on the new canvas.
        final_text_block_x_content_on_canvas = int((img_cv.shape[1] - max_overall_text_width) / 2)
    else: # Specific x-coordinate provided
        final_text_block_x_content_on_canvas = temp_text_block_x_content_start + offset_x_canvas

    if org[1] is None: # Vertical centering
        # Center the entire text block vertically on the new canvas.
        final_text_block_y_content_top_on_canvas = int((img_cv.shape[0] - total_text_block_content_height) / 2)
    else: # Specific y-coordinate provided (baseline)
        final_text_block_y_content_top_on_canvas = temp_text_block_y_content_top + offset_y_canvas

    # 5. Place Text Background (before text) for the entire block.
    # This is drawn only if transparency is greater than 0 and there is text to draw.
    if text_background_transparency > 0 and wrapped_lines_info:
        # Calculate the top-left and bottom-right corners of the entire text block's background.
        # These are relative to the final position on the canvas.
        x1_bg = final_text_block_x_content_on_canvas - padding_x
        y1_bg = final_text_block_y_content_top_on_canvas - padding_y
        x2_bg = final_text_block_x_content_on_canvas + max_overall_text_width + padding_x
        y2_bg = final_text_block_y_content_top_on_canvas + total_text_block_content_height + padding_y

        # Ensure background coordinates are within the image bounds to prevent drawing outside.
        x1_bg = max(0, x1_bg)
        y1_bg = max(0, y1_bg)
        x2_bg = min(img_cv.shape[1], x2_bg)
        y2_bg = min(img_cv.shape[0], y2_bg)

        # Only draw the rectangle if the bounding box is valid (positive width and height).
        if x2_bg > x1_bg and y2_bg > y1_bg:
            overlay = img_cv.copy() # Create a copy to draw the background on.
            # Draw a filled rectangle for the background.
            cv2.rectangle(overlay, (x1_bg, y1_bg), (x2_bg, y2_bg), text_background_color, -1)
            # Blend the overlay with the original image using the specified transparency.
            alpha = text_background_transparency
            cv2.addWeighted(overlay, alpha, img_cv, 1 - alpha, 0, img_cv)

    # 6. Place Text (loop through each wrapped line).
    # Calculate the baseline of the first line from the top of the text content block.
    # The baseline of the first line is the top of the text content block + the distance from its top to its baseline.
    current_line_y_baseline = final_text_block_y_content_top_on_canvas - first_line_top_offset_from_baseline

    for i, (line_text, line_w, line_h, line_baseline) in enumerate(wrapped_lines_info):
        line_org_x = final_text_block_x_content_on_canvas # Default to block's left edge
        
        # If the original request was for horizontal centering, recalculate x-origin for each line
        # to ensure each line is individually centered within the *new* canvas.
        if org[0] is None:
            line_org_x = int((img_cv.shape[1] - line_w) / 2)

        # Put the text on the image.
        cv2.putText(img_cv, line_text, (line_org_x, current_line_y_baseline),
                    font_face, font_scale, color, thickness, cv2.LINE_AA)
        
        # Move the y-coordinate down to the baseline of the next line.
        # The height of the line itself is line_h, but we use text_h_single_line for consistent spacing.
        if i < len(wrapped_lines_info) - 1: # Don't add spacing after the last line
            current_line_y_baseline += (text_h_single_line + line_spacing)

    # 7. Return the modified image.
    return img_cv


In [None]:
image=Image.open("generated/step_99.png")
image=add_text_to_image(
    image,
    text="This is a test of the text wrapping functionality. It should wrap correctly and not overflow the image boundaries.",
    org=(None, None),  # Center both horizontally and vertically
    font_face=cv2.FONT_HERSHEY_SIMPLEX,
    font_scale=1.0,
    color=(0, 0, 0),  # Black text
    thickness=2,
    background_color=(255, 255, 255),  # White background for the new canvas
    text_background_color=(200, 200, 200),  # Light gray background for the text
    text_background_transparency=0.8,  # 80% transparency for the text background
    padding_x=20,  # Horizontal padding around the text background
    padding_y=20,  # Vertical padding around the text background
    wordwrap=True  # Enable word wrapping
)

# Convert the NumPy array (OpenCV format) to a Pillow Image and display it
Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).show()