In [11]:
import easyocr
import cv2
import matplotlib.pyplot as plt


import requests
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
import uuid
import hashlib

import os
import matplotlib.pyplot as plt


import torch
from PIL import Image
from rembg import new_session, remove
from torchvision import transforms
from transformers import AutoModelForImageSegmentation

In [12]:
FLUX_SERVER_ENDPOINT = 'http://192.165.134.27:22186/flux-generate-image'
FLUX_SERVER_ENDPOINT = 'http://127.0.0.1:11234/flux-generate-image'
FLUX_TIMEOUT = 300
save_dir = 'data_prize_words/generated'
Path(save_dir).mkdir(parents=True, exist_ok=True)

In [13]:
def get_hash_from_uuid(hash_val: str | None = None, hash_len: int = 5) -> str:
    # Generate a UUID4 and convert it to a string
    if not hash_val:
        hash_val = str(uuid.uuid4())

    # Hash the UUID string using SHA-256
    hash_object = hashlib.sha256(hash_val.encode())
    hex_dig = hash_object.hexdigest()
    return hex_dig[:hash_len]

In [14]:
def flux_get_image_from_prompt(
    prompt: str,
    save_path: str,
    resolution: tuple | None = None,
    num_inference_steps: int = 28,
    guidance_scale: float = 3.5,
    seed: int = 24,
) -> str:
    resolution = resolution if resolution else (1024, 1024)
    width, height = resolution
    data = {
        'prompt': prompt,
        'width': width,
        'height': height,
        'seed': seed,
        'num_inference_steps': num_inference_steps,
        'guidance_scale': guidance_scale,
    }

    response = requests.post(
        FLUX_SERVER_ENDPOINT,
        data=data,
        timeout=FLUX_TIMEOUT,
    )

    if response.status_code == 200:
        with Path(save_path).open('wb') as output_file:
            output_file.write(response.content)

        return save_path
    raise ValueError(f'Bad response from FLUX API. Status code: {response.status_code}')

In [15]:
def generate_single_image(prompt, gen_params):
    unique_save_path = Path(save_dir) / f"{get_hash_from_uuid(hash_val=prompt)}.png"
    generated_image_path = flux_get_image_from_prompt(prompt=prompt, save_path=unique_save_path, **gen_params)
    return generated_image_path

## Segmentation

In [16]:
def remove_background_from_symbol(src: str, dest: str) -> None:
    torch.set_float32_matmul_precision('high')

    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
    model: torch.nn.Module = AutoModelForImageSegmentation.from_pretrained(
        'briaai/RMBG-2.0', trust_remote_code=True
    )

    model.to(device)
    model.eval()

    to_pillow: transforms.ToPILImage = transforms.ToPILImage()

    transform_image: transforms.Compose = transforms.Compose(
        [
            transforms.Resize((1024, 1024)),
            transforms.ToTensor(),
        ]
    )

    img = Image.open(src)
    input_img: torch.Tensor = transform_image(img).unsqueeze(0).to(device)

    with torch.no_grad():
        mask: torch.Tensor = model(input_img)[-1].sigmoid()[0].squeeze().cpu()

    img.putalpha(to_pillow(mask).resize(img.size))
    img.save(dest)

## Word detection

In [10]:
def detect_multi_words(image_path: str, new_width=500, new_height=500):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    reader = easyocr.Reader(['en'])
    results = reader.readtext(gray)

    processed_results = []
    for bbox, text, prob in results:
        words = text.split()
        if len(words) > 1:
            x_min = min([point[0] for point in bbox])
            x_max = max([point[0] for point in bbox])
            y_min = min([point[1] for point in bbox])
            y_max = max([point[1] for point in bbox])
            
            width_per_word = (x_max - x_min) // len(words)
            
            for i, word in enumerate(words):
                new_bbox = [[x_min + i * width_per_word, y_min], 
                            [x_min + (i + 1) * width_per_word, y_min],
                            [x_min + (i + 1) * width_per_word, y_max], 
                            [x_min + i * width_per_word, y_max]]
                processed_results.append((word, new_bbox, prob / len(words)))
        else:
            processed_results.append((text, bbox, prob))
    
    return processed_results, image

In [23]:
def save_cropped_words(image, detected_words, output_folder="cropped_words"):

    # Create output folder if it doesn’t exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    cropped_images = []

    # Iterate through detected words and crop them
    dir_to_save = Path(output_folder) / get_hash_from_uuid(hash_len=3)
    Path(dir_to_save).mkdir(parents=True, exist_ok=True)
    for i, (word, bbox, prob) in enumerate(detected_words):
        # Ensure correct bounding box extraction
        x_min = int(min(point[0] for point in bbox))
        x_max = int(max(point[0] for point in bbox))
        y_min = int(min(point[1] for point in bbox))
        y_max = int(max(point[1] for point in bbox))

        # Ensure coordinates are within the image bounds
        x_min = max(0, x_min)
        y_min = max(0, y_min)
        x_max = min(image.shape[1], x_max)
        y_max = min(image.shape[0], y_max)

        # Prevent invalid cropping
        if x_max > x_min and y_max > y_min:
            cropped = image[y_min:y_max, x_min:x_max]            
            filename = f"{dir_to_save}/{word}_{i}.png"
            cv2.imwrite(filename, cropped)

            # Store cropped image for visualization
            cropped_images.append((word, cropped, filename))
            
            remove_background_from_symbol(filename, filename)

    return str(dir_to_save)

In [21]:
def generate_words(prompt):
    gen_params = {
        'num_inference_steps': 30, 
        'guidance_scale': 10.0,
        'seed': 2,
        'resolution': (1024, 1024)
    }
    image_path = generate_single_image(prompt=prompt, gen_params=gen_params)
    print(f'Generated image path: {image_path}')
    detected_words, image = detect_multi_words(image_path)
    saved_dir_path = save_cropped_words(image, detected_words)
    
    print(f"Saved to: {saved_dir_path}")    

### Anime

In [22]:
words_prompts = f"""
A high-resolution typography design featuring the words ‘MINI’, ‘MINOR’, and ‘MAJOR’ in a bold, anime-style font, each word placed separately with clear empty space around it. The words are not touching or overlapping, ensuring they stand alone with well-balanced white space between them. The typography embodies the energetic, dynamic, and eye-catching aesthetic of Japanese anime title designs, inspired by shonen action series, mecha anime, and high-energy opening sequences. The font is bold, exaggerated, and slightly angled, with sharp edges, thick strokes, and a fast-paced, electric feel.
The colors remain vibrant and high-contrast, featuring neon pinks, electric blues, fiery reds, and golden yellows, capturing the intensity and excitement of anime visuals. Each word is outlined individually, without overlapping strokes or elements from other words. No motion lines or speed effects connecting the words—each is completely isolated, floating independently in the design.
The background remains fully white, ensuring that the typography pops with an intense, anime-inspired visual impact while maintaining clean separation between words. No additional symbols or elements—just pure anime-style typography, where each word stands alone in a well-spaced, high-energy composition.
"""

generate_words(prompt=words_prompts)

Generated image path: data_prize_words/generated/930c0.png
134 370 38 160




72 439 162 313
52 463 326 461
Saved to: cropped_words/58d


### Spanch Bob

In [24]:
words_prompts = f"""
A high-resolution typography design featuring the words ‘MINI’, ‘MINOR’, and ‘MAJOR’ in a bold, SpongeBob-style font, each word placed separately with clear empty space around it. The words are not touching or overlapping, ensuring they stand alone with well-balanced white space between them.

The typography embodies the wacky, playful, and bubbly aesthetic of the SpongeBob SquarePants universe, inspired by oceanic themes, underwater adventures, and cartoonish fun. The font is bouncy, wobbly, and slightly irregular, with a hand-drawn, organic feel, making the letters look as if they are floating underwater.

The colors are bright, tropical, and lively, featuring sunny yellows, ocean blues, coral oranges, and sandy beiges, evoking the vibrant world of Bikini Bottom. Each word has a thick, bold outline and may include bubble-like textures, wavy distortions, or soft, squishy edges to enhance the underwater, fun-loving aesthetic.

The background remains fully white, ensuring that the typography pops vividly while maintaining a clean separation between words. No additional symbols or elements—just pure SpongeBob-style typography, where each word stands alone in a whimsical, cartoonish, and high-energy composition.
"""

generate_words(prompt=words_prompts)

Generated image path: data_prize_words/generated/13578.png
Saved to: cropped_words/3ff


### USA

In [25]:
words_prompts = f"""
A high-resolution typography design featuring the words ‘MINI’, ‘MINOR’, and ‘MAJOR’ in a bold, USA-style font, each word placed separately with clear empty space around it. The words are not touching or overlapping, ensuring they stand alone with well-balanced white space between them.

The typography embodies the patriotic, powerful, and bold aesthetic of American culture, inspired by classic American sports lettering, national pride, and vintage USA signage. The font is strong, blocky, and all-caps, with a varsity, collegiate, or bold sans-serif style, reminiscent of American football jerseys, political banners, or iconic USA logos.

The colors are red, white, and blue, representing the American flag, with deep navy blues, bold crimson reds, and crisp whites. Each word is outlined individually with a thick, high-contrast stroke, giving it a clean, structured look. Subtle star patterns, stripes, or a slight 3D effect may enhance the design, evoking the pride and energy of American themes.

The background remains fully white, ensuring that the typography pops vividly while maintaining a strong, patriotic presence. No additional symbols or elements—just pure USA-inspired typography, where each word stands alone in a bold, high-energy, and pride-filled composition.
"""

generate_words(prompt=words_prompts)

Generated image path: data_prize_words/generated/33bfa.png
Saved to: cropped_words/1fa


### Egyptian

In [26]:
words_prompts = f"""
A high-resolution typography design featuring the words ‘MINI’, ‘MINOR’, and ‘MAJOR’ in a bold, Egyptian-inspired font, each word placed separately with clear empty space around it. The words are not touching or overlapping, ensuring they stand alone with well-balanced white space between them.
The typography embodies the mystical, ancient, and regal aesthetic of Egyptian civilization, inspired by hieroglyphic inscriptions, pharaonic stone carvings, and golden tomb engravings. The font is ornate and angular, with bold, structured letterforms featuring subtle curves and embellishments reminiscent of ancient Egyptian scripts. Some characters may have hieroglyphic-like extensions, decoative serifs, or a slightly weathered texture, enhancing the historic and mystical appearance.
The colors are rich and opulent, featuring deep gold, warm sandstone beige, and lapis lazuli blue, evoking the luxury of the pharaohs, the golden treasures of Tutankhamun, and the sacred stones of ancient temples. Each word has a thick, bold outline with subtle engravings or a chiseled effect, making the letters appear as if they are carved into an ancient monument or painted on a temple wall.
The background remains fully white, ensuring that the typography pops vividly while maintaining a clean, regal presence. No additional symbols or elements—just pure Egyptian-style typography, where each word stands alone in a timeless, mystical, and historically inspired composition.
"""

generate_words(prompt=words_prompts)

Generated image path: data_prize_words/generated/e37c3.png
Saved to: cropped_words/72e


### Harry Potter

In [27]:
words_prompts = f"""
A high-resolution typography design featuring the words ‘MINI’, ‘MINOR’, and ‘MAJOR’ in a bold, Harry Potter-inspired font, each word placed separately with clear empty space around it. The words are not touching or overlapping, ensuring they stand alone with well-balanced white space between them.
The typography embodies the mystical, gothic, and enchanted aesthetic of the Harry Potter universe, inspired by ancient spellbooks, wizarding school inscriptions, and magical artifacts. The font is ornate, slightly elongated, and gothic-styled, with sharp, lightning-like serifs, flowing curves, and an old-world, medieval charm. Some letters may have wand-like extensions, flickering edges, or a subtle distressed texture, enhancing the magical, spellbinding effect.
The colors are deep and mysterious, featuring golden enchantments, dark midnight blues, smoky grays, and rich burgundy hues, evoking the magical aura of Hogwarts, enchanted scrolls, and wizard robes. Each word is outlined individually with a glowing, mystical aura or a soft golden shimmer, adding a spell-casting presence. Subtle floating sparkles, swirling mist, or lightning-inspired accents may enhance the typography, giving it the feel of a legendary incantation.
The background remains fully white, ensuring that the typography pops vividly while maintaining a magical and mysterious elegance. No additional symbols or elements—just pure Harry Potter-style typography, where each word stands alone in a well-spaced, enchanted, and wizardly composition.
"""

generate_words(prompt=words_prompts)

Generated image path: data_prize_words/generated/19e6c.png
Saved to: cropped_words/96d


### Magic Forest

In [28]:
words_prompts = f"""
A high-resolution typography design featuring the words ‘MINI’, ‘MINOR’, and ‘MAJOR’ in a bold, enchanted Magic Forest-inspired font, each word placed separately with clear empty space around it. The words are not touching or overlapping, ensuring they stand alone with well-balanced white space between them.
The typography embodies the mystical, ethereal, and whimsical aesthetic of an enchanted forest, inspired by ancient woodland myths, fairy tales, and magical nature realms. The font is organic and slightly twisted, with elegant flourishes, vine-like extensions, and curving, nature-inspired strokes. Some letters may have leafy tendrils, roots curling around the edges, or subtle glowing highlights, evoking the presence of deep forest magic.
The colors are earthy and luminous, featuring emerald greens, twilight purples, soft mossy browns, and shimmering moonlight silvers, capturing the dreamlike atmosphere of glowing fungi, enchanted trees, and mystical woodland spirits. Each word is outlined individually with a subtle glow, giving the effect of bioluminescent plants or magical fireflies softly illuminating the letters. Delicate misty swirls, scattered fairy dust, or faint rune-like engravings may enhance the typography, giving it a sense of otherworldly wonder and ancient magic.
The background remains fully white, ensuring that the typography pops vividly while maintaining a dreamy and enchanted presence. No additional symbols or elements—just pure Magic Forest-style typography, where each word stands alone in a well-spaced, mystical, and nature-inspired composition.
"""

generate_words(prompt=words_prompts)

Generated image path: data_prize_words/generated/9dafb.png
Saved to: cropped_words/742


### Retro Cars

In [29]:
words_prompts = f"""
A high-resolution typography design featuring the words ‘MINI’, ‘MINOR’, and ‘MAJOR’ in a bold, retro cars-inspired font, each word placed separately with clear empty space around it. The words are not touching or overlapping, ensuring they stand alone with well-balanced white space between them.
The typography embodies the sleek, bold, and nostalgic aesthetic of classic vintage automobiles, inspired by 1950s and 1960s car emblems, racing decals, and chrome-plated badges. The font is smooth, dynamic, and slightly slanted, with sharp edges and stylish curves, reminiscent of vintage car branding and retro speedometers. Some letters may have italicized motion streaks, Art Deco-inspired serifs, or a sleek aerodynamic flow, mimicking the elegance of mid-century automobiles.
The colors are bold and high-contrast, featuring chrome silvers, deep reds, classic blues, and rich blacks, evoking the luxurious paint finishes and metallic trims of vintage muscle cars and convertibles. Each word is outlined individually with a glossy, reflective sheen or a metallic gradient, giving it the feel of a classic car logo. Optional subtle textures like checkered racing stripes, neon glow edges, or polished metal reflections may enhance the typography, adding to its nostalgic, high-speed appeal.
The background remains fully white, ensuring that the typography pops vividly while maintaining a sleek and stylish presence. No additional symbols or elements—just pure Retro Cars-style typography, where each word stands alone in a well-spaced, vintage, and automobile-inspired composition.
"""

generate_words(prompt=words_prompts)

Generated image path: data_prize_words/generated/2414d.png
Saved to: cropped_words/777
