## AGENTS


In [1]:
from autogen import ConversableAgent
import os


In [2]:
import cv2
import pytesseract
from sklearn.cluster import KMeans
from PIL import Image
import numpy as np

class ImageAnalysisAgent:
    def __init__(self, image_path):
        self.image = cv2.imread(image_path)
        self.rgb_image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB)

    def object_identification(self):
        # Use a pre-trained model like YOLO or Faster R-CNN for object detection
        # For simplicity, let's assume we have a function detect_objects
        objects = detect_objects(self.rgb_image)
        return objects

    def color_identification(self, num_colors=5):
        pixels = self.rgb_image.reshape((-1, 3))
        kmeans = KMeans(n_clusters=num_colors)
        kmeans.fit(pixels)
        colors = kmeans.cluster_centers_
        return colors

    def position_extraction(self, objects):
        positions = []
        for obj in objects:
            x, y, w, h = obj['bbox']
            center_x, center_y = x + w // 2, y + h // 2
            positions.append((center_x, center_y))
        return positions

    def character_recognition(self):
        text = pytesseract.image_to_string(self.rgb_image)
        return text

def detect_objects(image):
    # Placeholder for object detection logic
    return [{"bbox": (50, 50, 100, 100), "label": "Logo"}]

# Example usage
agent = ImageAnalysisAgent('/home/temesgen_gebreabzgi/semantic_image_and_text_alignment/data/Challenge_Data/Assets/2a355ca0d306921e195591e5b2374b6a/_preview.png')
objects = agent.object_identification()
colors = agent.color_identification()
positions = agent.position_extraction(objects)
text = agent.character_recognition()

print("Objects: ", objects)
print("Colors: ", colors)
print("Positions: ", positions)
print("Extracted Text: ", text)


  super()._check_params_vs_input(X, default_n_init=10)


Objects: [{'bbox': (50, 50, 100, 100), 'label': 'Logo'}]
Colors: [[218.18092283 113.02698577  75.32102231]
 [ 11.39495018   6.88686595   9.30965328]
 [251.77187974 249.1893474  245.93488473]
 [  1.29373487 145.55770783 160.93916465]
 [144.23347073  25.93336356  10.61686727]]
Positions: [(100, 100)]
Extracted Text:  

Tas i Tas
or SWIPE Ras

4 PROTEGE ed ue

 



In [None]:
from PIL import Image
import glob, os

source_directory = '/home/temesgen_gebreabzgi/semantic_image_and_text_alignment/data/adludio storyboard examples'
Assests_directory = '/home/temesgen_gebreabzgi/semantic_image_and_text_alignment/data/Challenge_Data/Assets/0a22f881b77f00220f2034c21a18b854' 
destination_directory = '/home/temesgen_gebreabzgi/semantic_image_and_text_alignment/images' 
os.makedirs(destination_directory, exist_ok=True)
size = (128, 128)
pattern = os.path.join(source_directory, '*.png')


for infile in glob.glob(pattern):
    file, ext = os.path.splitext(infile)
    with Image.open(infile) as im:
        #im.thumbnail(size)
        #im.save(file + ".thumbnail.png", "PNG")
        im.show()

        


In [None]:
from PIL import Image

class StoryBoard:
    @staticmethod
    def combine_images_horizontally(images: list[Image.Image]) -> Image.Image:
        total_width = sum(image.width for image in images)
        max_height = max(image.height for image in images)
        combined_image = Image.new('RGBA', (total_width, max_height))

        current_x = 0
        for image in images:
            combined_image.paste(image, (current_x, 0))
            current_x += image.width

        return combined_image

    @staticmethod
    def combine_images_vertically(images: list[Image.Image]) -> Image.Image:
        total_height = sum(image.height for image in images)
        max_width = max(image.width for image in images)
        combined_image = Image.new('RGBA', (max_width, total_height))

        current_y = 0
        for image in images:
            combined_image.paste(image, (0, current_y))
            current_y += image.height

        return combined_image

    @staticmethod
    def create_storyboard(images: list[list[Image.Image]]) -> Image.Image:
        combined_rows = [StoryBoard.combine_images_horizontally(row) for row in images]
        storyboard = StoryBoard.combine_images_vertically(combined_rows)
        return storyboard

if __name__ == "__main__":

    # Define paths to images in the ./samples/ directory
    image1_path = "./samples/kfc-fs-320x480-sensoryvideo-storyboard.png"
    image2_path = "./samples/ITC-FS-320x480-SensorySwipe-Storyboard-Rev.png"
    image3_path = "./samples/Adludio-CocaCola-[BR]-[LIVE]-ifood-christmas2033-TapAndHold-FS-V3-sb.png"
    image4_path = "./samples/Adludio-Volvo-[UK]-[RFP]-Volvo_Vehicle_Electrification_XC40-Tap-FS-Version_2_AJ.png"
    image5_path = "./samples/Adludio-Microsoft-[FR]-[LIVE]-Windows_11_version_2-Swipe-MPU-v2.png"
    image6_path = "./samples/Disney-DrStrange-FS-600x900-UserSlider-Storyboard.png"

    image1 = Image.open(image1_path)
    image2 = Image.open(image2_path)
    image3 = Image.open(image3_path)
    image4 = Image.open(image4_path)
    image5 = Image.open(image5_path)
    image6 = Image.open(image6_path)


    images = [
        [image1, image2, image3],  # First row
        [image4, image5, image6]   # Second row
    ]
    storyboard = StoryBoard.create_storyboard(images)
    storyboard.show()
