<a href="https://colab.research.google.com/github/leosilverberg/collage_maker/blob/main/collage_pipe2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SETUP

In [None]:
%%capture
!pip install transformers datasets accelerate timm InstructorEmbedding sentence-transformers openai
!pip install guardrails-ai
!pip install langchain

Please create this folder on your drive or change the cd here.

In [None]:
from google.colab import drive

drive.mount('/content/gdrive/')

%cd "/content/gdrive/MyDrive/AI/collage"


Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
/content/gdrive/MyDrive/AI/collage


Specify:

*   Final canvas size
*   Folder containing source images
*   Destination folder
*   How many cutouts to make per image (recommended 1 - 2)
*   OpenAI API key



In [None]:
CANVAS_WIDTH= 1080
CANVAS_HEIGHT= 2048
INPUT_FOLDER = "test_images3"
OUTPUT_FOLDER = "out10"
CUTOUTS_PER = 2
OPEN_AI_API_KEY = "fill this in"

In [None]:
import pandas as pd

df = pd.DataFrame(columns=["img_path", "caption", "original_x", "original_y","canvas_x","canvas_y","width","height"])

print(df)

Empty DataFrame
Columns: [img_path, caption, original_x, original_y, canvas_x, canvas_y, width, height]
Index: []


# RENDER FUNCTION (run me)

In [None]:
import os
from PIL import Image
import pandas as pd

def render_and_save_canvas(df, output_folder, canvas_width=1000, canvas_height=1000, image_name="render"):
    # Check and create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Create a blank canvas with transparency
    canvas = Image.new('RGBA', (canvas_width, canvas_height), (255, 255, 255, 0))

    for idx, row in df.iterrows():
        img_path = row['img_path']

        # Check for NaN values in canvas_x and canvas_y
        if pd.isna(row['canvas_x']) or pd.isna(row['canvas_y']):
            print(f"Skipping {img_path} due to NaN values for coordinates.")
            continue  # Skip this iteration

        x = int(row['canvas_x'])
        y = int(row['canvas_y'])

        # Load the image
        img = Image.open(img_path).convert('RGBA')

        # Adjust the position to consider the center of the image
        # half_width = int(row['width'] / 2)
        # half_height = int(row['height'] / 2)
        # adjusted_x = x - half_width
        # adjusted_y = y - half_height

        # Use the adjusted position for pasting
        position = (x, y)
        canvas.paste(img, position, img)  # use img as the mask for transparency



    # Construct the save path using the order rank
    save_path = os.path.join(output_folder, f"{image_name}.png")

    # Save the resulting canvas as PNG with transparent background
    canvas.save(save_path, 'PNG')
    print(f"Saved canvas to {save_path}")

# 1.Cutouts


In [None]:
import torch
from transformers import SamModel, SamProcessor
import os
from datetime import datetime
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import gc
from PIL import Image
import os
import random

device = torch.device("cuda")
sam_model = SamModel.from_pretrained("facebook/sam-vit-huge").to(device)
sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-huge")

In [None]:
def extract_using_mask(raw_image, masks, scores):
    # Ensure masks are properly squeezed
    if len(masks.shape) == 4:
        masks = masks.squeeze()

    # If scores has more than 1 dimension, squeeze it to 1D
    if len(scores.shape) > 1:
        scores = scores.squeeze()

    # If after squeezing, scores is still multi-dimensional, then there's an issue
    if len(scores.shape) > 1:
        raise ValueError("Scores tensor has more than one dimension after squeezing.")

    # Get the max score and its index
    max_score_value = scores.max().item()
    max_score_index = scores.tolist().index(max_score_value)

    # Based on max_score_index, get the top mask
    top_mask = masks[max_score_index]

    # If the mask isn't a binary mask, you might need to convert it (for instance, using a threshold)
    # For this example, I'll assume it's already binary

    # Convert the mask from PyTorch tensor to numpy
    top_mask = top_mask.cpu().numpy()

    # Convert the mask to 3 channels (for RGB)
    top_mask_rgb = np.stack([top_mask]*3, axis=-1)

    # Apply the mask to the image
    segmented_image_array = np.array(raw_image) * top_mask_rgb

    # Now let's make the black areas transparent:
    alpha_channel = (segmented_image_array.max(axis=-1) != 0).astype(np.uint8) * 255
    segmented_image_rgba = np.dstack([segmented_image_array, alpha_channel])

    # Convert back to PIL Image for consistency
    segmented_image = Image.fromarray(np.uint8(segmented_image_rgba), 'RGBA')


    # Get the bounding box of the non-transparent area
    non_transparent_points = np.where(alpha_channel)
    min_y, max_y = np.min(non_transparent_points[0]), np.max(non_transparent_points[0])
    min_x, max_x = np.min(non_transparent_points[1]), np.max(non_transparent_points[1])

    # Crop to the bounding box
    cropped_image = segmented_image.crop((min_x, min_y, max_x, max_y))

    return cropped_image

In [None]:
def generate_random_pois(width, height, num_points):
  return [[[random.randint(0, width-1), random.randint(0, height-1)]] for _ in range(num_points)]

In [None]:
def process_images_in_directory(directory, max_size=1040):
    img_paths = [os.path.join(directory, fname) for fname in os.listdir(directory) if fname.lower().endswith(('png', 'jpg', 'jpeg'))]

    for img_path in img_paths:
        # Load the image
        with Image.open(img_path) as img:
            # Correct the image orientation
            img = correct_image_orientation(img)

            # Resize the image
            img_resized = resize_image(img, max_size)

            # Save the resized image back to the directory (overwriting the original image)
            img_resized.save(img_path)

In [None]:
def resize_image(img, max_size=1040):
    # Get original dimensions
    width, height = img.size

    # Determine new dimensions that keep the aspect ratio intact
    if width > height:
        new_width = max_size
        new_height = int((new_width / width) * height)
    else:
        new_height = max_size
        new_width = int((new_height / height) * width)

    # Resize the image
    img_resized = img.resize((new_width, new_height), Image.ANTIALIAS)

    return img_resized

In [None]:
def correct_image_orientation(img):
    try:
        # Attempt to get the Exif orientation tag
        exif = img._getexif()
        orientation_key = 274  # This is the Exif tag for orientation
        if exif and orientation_key in exif:
            orientation = exif[orientation_key]

            # Corrected rotate_values mapping
            rotate_values = {
                2: Image.FLIP_LEFT_RIGHT,
                3: Image.ROTATE_180,
                4: Image.FLIP_TOP_BOTTOM,
                5: Image.FLIP_LEFT_RIGHT,  # Horizontal flip
                6: Image.ROTATE_270,       # 90° counter-clockwise rotation
                7: Image.FLIP_TOP_BOTTOM,  # Vertical flip
                8: Image.ROTATE_90         # 90° clockwise rotation
            }

            if orientation in rotate_values:
                return img.transpose(rotate_values[orientation])
    except (AttributeError, KeyError, IndexError):
        # Handle cases where the image doesn't have Exif data or there's some other issue
        pass

    return img  # Return the original image if no orientation correction is needed

In [None]:
import os
from datetime import datetime
from PIL import Image

current_time = datetime.now()
folder_name = current_time.strftime('%Y_%m_%d_%H_%M_%S')
path_to_new_folder = os.path.join("sam_runs", folder_name)
os.makedirs(path_to_new_folder, exist_ok=True)
print(f"New folder created at: {path_to_new_folder}")


images_dir = INPUT_FOLDER

process_images_in_directory(images_dir)

img_paths =  [os.path.join(images_dir, fname) for fname in os.listdir(images_dir) if fname.lower().endswith(('png', 'jpg', 'jpeg'))]

# Open the images and correct rotation
images = [Image.open(path) for path in img_paths]

for path, img in zip(img_paths, images):
  img_name= os.path.basename(path)
  print(f"Processing image: {img_name}")

  sam_inputs = sam_processor(img, return_tensors="pt").to(device)
  image_embeddings = sam_model.get_image_embeddings(sam_inputs["pixel_values"])
  poi_count=CUTOUTS_PER #number of clips to make
  width, height = img.size
  pois = generate_random_pois(width, height, poi_count)
  print(pois)
  input_points = [pois]

  sam_inputs = sam_processor(img, input_points=input_points, return_tensors="pt").to(device)
  sam_inputs.pop("pixel_values", None)
  sam_inputs.update({"image_embeddings": image_embeddings})

  with torch.no_grad():
    sam_outputs = sam_model(**sam_inputs)

  masks = sam_processor.image_processor.post_process_masks(sam_outputs.pred_masks.cpu(), sam_inputs["original_sizes"].cpu(), sam_inputs["reshaped_input_sizes"].cpu())
  scores = sam_outputs.iou_scores
  scores.shape

  for poi_n in range(poi_count):
    segmented_image = extract_using_mask(img, masks[0][poi_n], scores[:, 0, :])

     # Get the dimensions of the segmented image
    segmented_width, segmented_height = segmented_image.size

    # Calculate relative x and y
    x, y = pois[poi_n][0]
    relative_x = x / width
    relative_y = y / height

    #save!
    output_path = path_to_new_folder+"/"+str(img_name)+str(poi_n) + ".png"
    print(f"Saving image: {output_path}")
    segmented_image.save(output_path, "PNG")

    # Add row to DataFrame one column at a time
    new_idx = len(df)
    df.loc[new_idx, 'img_path'] = output_path
    df.loc[new_idx, 'original_x'] = relative_x
    df.loc[new_idx, 'original_y'] = relative_y
    df.loc[new_idx, 'width'] = segmented_width  # Add segmented width to DataFrame
    df.loc[new_idx, 'height'] = segmented_height  # Add segmented height to DataFrame

print(df.head())

New folder created at: sam_runs/2023_09_26_18_17_41


  img_resized = img.resize((new_width, new_height), Image.ANTIALIAS)


Processing image: A001595-R1-06-28A.JPG
[[[361, 626]], [[238, 562]]]
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-06-28A.JPG0.png
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-06-28A.JPG1.png
Processing image: A001595-R1-26-8A.JPG
[[[496, 548]], [[1024, 370]]]
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-26-8A.JPG0.png
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-26-8A.JPG1.png
Processing image: A001595-R1-23-11A.JPG
[[[167, 84]], [[796, 673]]]
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-23-11A.JPG0.png
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-23-11A.JPG1.png
Processing image: A001595-R1-17-17A.JPG
[[[655, 653]], [[16, 645]]]
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-17-17A.JPG0.png
Saving image: sam_runs/2023_09_26_18_17_41/A001595-R1-17-17A.JPG1.png
Processing image: A001594-R1-01-2.JPG
[[[720, 267]], [[538, 567]]]
Saving image: sam_runs/2023_09_26_18_17_41/A001594-R1-01-2.JPG0.png
Saving image: sam_runs/2023_09_26

#2. Captions

In [None]:
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import torch

blip2_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
blip2_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16, device_map="auto")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from matplotlib.pyplot import imshow
from datetime import datetime
from PIL import Image

for index, row in df.iterrows():
  img = Image.open(row['img_path'])

  blip2_inputs = blip2_processor(img, return_tensors="pt").to("cuda", torch.float16)

  out = blip2_model.generate(**blip2_inputs)

  out_string = blip2_processor.decode(out[0], skip_special_tokens=True)

  df.at[index, 'caption'] = out_string

print(df.head())



                                            img_path  \
0  sam_runs/2023_09_26_18_17_41/A001595-R1-06-28A...   
1  sam_runs/2023_09_26_18_17_41/A001595-R1-06-28A...   
2  sam_runs/2023_09_26_18_17_41/A001595-R1-26-8A....   
3  sam_runs/2023_09_26_18_17_41/A001595-R1-26-8A....   
4  sam_runs/2023_09_26_18_17_41/A001595-R1-23-11A...   

                                             caption original_x original_y  \
0  a man is sitting in a chair with a book in his...   0.347115   0.902017   
1                         a man with a white shirt\n   0.228846   0.809798   
2              a brown shape on a black background\n   0.476923   0.789625   
3  a piece of fruit is shown on a black background\n   0.984615   0.533141   
4                a man flying a kite in the desert\n   0.160577   0.121037   

  canvas_x canvas_y width height  
0      NaN      NaN   559    605  
1      NaN      NaN   559    607  
2      NaN      NaN   136    112  
3      NaN      NaN   365    211  
4      NaN      NaN

# 3. LLM inital placement

In [None]:
from pydantic import BaseModel, Field
from typing import List, Optional
from guardrails.validators import ValidRange, ValidChoices
from rich import print

from langchain.output_parsers import GuardrailsOutputParser

from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.llms import OpenAI

In [None]:
import openai
openaikey = OPEN_AI_API_KEY
openai.api_key = OPEN_AI_API_KEY

In [None]:
from langchain.llms import OpenAIChat
openaichat = OpenAIChat(model_name="gpt-3.5-turbo-16k", openai_api_key=openaikey)



In [None]:
rail_spec = """
<rail version="0.1">
<output>
<object name="cutout_location">
<integer name="x"></integer>
<integer name="y"></integer>
<string description="Why did you place the cutout here?" name="motivation"></string>
</object>
</output>
<prompt>

You are an expert collage arranger.
I will give you a description of a cutout and a canvas size, as well as a description of what is already on the canvas.
Use this information to give a location for the new cutout - considering what is on the collage already and where it would make sense to place this cutout, as well as it's size.
The placement is defined by X and Y, consider both these when placing the cutout.
Make sure not to place all the cutouts at the same place. And make sure to consider the full canvas, as we want a good spread across it.

Cutout description:
${cutout_description}

Cutout size:
${cutout_size}

Canvas size:
${canvas_size}

Canvas content:
${canvas_content}

${gr.complete_json_suffix_v2}
</prompt>
</rail>
"""

In [None]:
output_parser = GuardrailsOutputParser.from_rail_string(rail_spec, api=openai.ChatCompletion.create)

In [None]:
prompt = PromptTemplate(
    template=output_parser.guard.prompt.escape(),
    input_variables=output_parser.guard.prompt.variable_names,
)

In [None]:
canvas_content_list =[]
def inital_placement(df):
  output_list = []
  x_list = []
  y_list = []

  for index, row in df.iterrows():
    cutout_description = row['caption']
    width = row['width']
    height = row['height']
    cutout_size = f"width: {width}, height: {height}"
    canvas_size = f"width: {CANVAS_WIDTH}, height: {CANVAS_HEIGHT}"
    canvas_content = "\n".join(canvas_content_list)

    full_prompt = prompt.format_prompt(cutout_description=cutout_description,
                                            cutout_size=cutout_size,
                                            canvas_size=canvas_size,
                                            canvas_content=canvas_content).to_string()

    print(full_prompt)

    output = openaichat(full_prompt)
    parsed_output = output_parser.parse(output)

    try:
      x = parsed_output['cutout_location']['x']
      y = parsed_output['cutout_location']['y']
      canvas_entry = f"Caption: {cutout_description}, x: {x}, y: {y}, width:{width}, height, {height}"
      print(parsed_output)
      output_list.append(parsed_output)
      canvas_content_list.append(canvas_entry)
      x_list.append(x)
      y_list.append(y)

    except Exception as e:
      output_list.append(None)
      x_list.append(None)
      y_list.append(None)
      continue

  df['output'] = output_list
  df['canvas_x'] = x_list
  df['canvas_y'] = y_list

  return df

In [None]:
initial_df = inital_placement(df)
render_and_save_canvas(df, OUTPUT_FOLDER, CANVAS_WIDTH, CANVAS_HEIGHT, "render_intial")

  warn(


In [None]:
print(df.head())

In [None]:
 render_and_save_canvas(df, OUTPUT_FOLDER, CANVAS_WIDTH, CANVAS_HEIGHT, "render_shuffled")

# 4. LLM Adjustment

In [None]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.utilities import GoogleSerperAPIWrapper
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, LLMChain
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

import re



In [None]:
def look_at_canvas(input):
    rows_as_strings = []
    for index, row in df.iterrows():
        row_str = f"Index id: {index}, Caption: {row['caption']}\nCanvas_Y: {row['canvas_y']}\nCanvas_X: {row['canvas_x']}\nWidth: {row['width']}\nHeight: {row['height']}\n"
        rows_as_strings.append(row_str)

    final_string = "\n---\n".join(rows_as_strings)
    return final_string


In [None]:
def move_cutout(cutout_id: int, new_x: int, new_y: int, new_id:int):
  """Use this when you need to move a cutout."""

  global df  # This will make sure you're using the global df variable

  df.at[cutout_id, 'canvas_x'] = new_x
  df.at[cutout_id, 'canvas_y'] = new_y

  row = df.loc[cutout_id]
  df.drop(cutout_id, inplace=True)
  df = pd.concat([df.iloc[:new_id], pd.DataFrame(row).T, df.iloc[new_id:]]).reset_index(drop=True)
  canvas = look_at_canvas("test")
  current_time = datetime.now()
  timestamp = current_time.strftime('%Y_%m_%d_%H_%M_%S')
  render_and_save_canvas(df, OUTPUT_FOLDER, CANVAS_WIDTH, CANVAS_HEIGHT, "render_shuffled_"+timestamp)

  return canvas

In [None]:
import ast
def parse_move(string):
    input_data = ast.literal_eval(string)

    # Extract values from the dictionary
    cutout_id = input_data.get('cutout_id')
    new_x = input_data.get('new_canvas_x')
    new_y = input_data.get('new_canvas_y')
    new_id = input_data.get('new_id')

    # Validate the extracted values
    if None in [cutout_id, new_x, new_y]:
        raise ValueError("Missing values in the input string")

    # Call the move_cutout function with the extracted values
    move_cutout(cutout_id, new_x, new_y, new_id)

In [None]:
from langchain.tools import BaseTool, StructuredTool, Tool, tool

In [None]:
tools = [
    Tool(
        name = "look_at_canvas",
        func=look_at_canvas,
        description="Use this when you need to look at the current state of the canvas. input is 'canvas'"
    ),
    Tool(
      name = "move_cutout",
      description = "Use this when you need to move a cutout. Use this format for input: {'cutout_id': 1, 'new_canvas_x': 100, 'new_canvas_y': 100, 'new_id':10}.  You dont have to move all dimensions, but do fill them in. The return will be the updated canvas",
      func = parse_move
    ),

]

In [None]:
#template for agent
# The format section is intersting to adjust, as well as the role section
template = """
Role: Photo Collage Maker

You are an expert photo collage maker. You have access to a canvas that has cutouts placed on it already. Please arrange these cutouts into a collage.
Each cutout has an associated width and height as well as X and Y for their location, the X and Y is specified by the center of the cutout - each cutout also has a description of it's content, consider the description when arranging the collage.
The order of the cutouts also matter, they will be rendered according to their order of their IDs. You can use the move tool to adjust the order as well.
You will be given the size of the full canvas, please consider this as well when placeing the cutouts.

Please use the tools to move the cutouts and to look at the canvas.

Please take your time to construct the best collage you can. A recommended workflow would be: Move cutout, look at canvas, move cutout, look at canvas etc.
Always make sure to look at the canvas and to move a cut out if needed.

TOOLS:
------
You have access to the following tools:
{tools}

To use a tool, ALWAYS use this full format:
```
Question: the input question you must answer
Thought: Do I need to use a tool? Yes + motivation for why using the tool makes sense
Action: the action to take, has be one of [{tool_names}]
Action Input: has to be input to the action
Observation: the result of the action
...this Question, Thought, Action, Action Input, Observation sequence should repeat atleast 5 times...
```
Make sure to use the tools.

If the collage is finished and you do not need to use a tool, you MUST use the format:
```
Thought: Do I need to use a tool? No
DONE
```

Begin!
Starting point: {input}
{agent_scratchpad}
"""

In [None]:
# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]


    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts


        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        return self.template.format(**kwargs)


prompt = CustomPromptTemplate(
    template=template,
    tools=tools,

    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

In [None]:
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import (
    PydanticOutputParser,
    OutputFixingParser,
    RetryOutputParser,
)

from langchain.base_language import BaseLanguageModel
from pydantic import BaseModel, Field, validator
from typing import List
from langchain.schema import AgentAction, AgentFinish, OutputParserException

In [None]:
class CustomOutputParser(AgentOutputParser):

    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "DONE" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("DONE")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise OutputParserException(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

output_parser = CustomOutputParser()

In [None]:
from langchain.chat_models import ChatOpenAI

In [None]:
llm =OpenAI(model="gpt-3.5-turbo-instruct",temperature=0.5, openai_api_key=openaikey)

In [None]:
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [None]:
# making an agent connected to the LLM chain, using the custom output parser and the tools list
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain,
    output_parser=output_parser,
    stop=["\nObservation:"],
    allowed_tools=tool_names,
    handle_parsing_errors=True,
)

In [None]:
# executor for the agent
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [None]:
#Test Query
canvas = look_at_canvas("test")




In [None]:
response = agent_executor.run(f"The canvas size is: width:{CANVAS_WIDTH}, height:{CANVAS_HEIGHT}, cutouts: {canvas}")
print(str(response))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes + I need to use the move_cutout tool to adjust the location of the cutout with index id 2. This will help create a better balance in the collage and make the brown shape stand out more.
Action: move_cutout
Action Input: {'cutout_id': 2, 'new_canvas_x': 772, 'new_canvas_y': 100}[0m



Observation:[33;1m[1;3mNone[0m
[32;1m[1;3mDo I need to use a tool? Yes + I need to use the move_cutout tool to adjust the location of the cutout with index id 1. This will help create a better balance in the collage and make the man with a white shirt stand out more.
Action: move_cutout
Action Input: {'cutout_id': 1, 'new_canvas_x': 550, 'new_canvas_y': 100}[0m



Observation:[33;1m[1;3mNone[0m
[32;1m[1;3mDo I need to use a tool? No
DONE[0m

[1m> Finished chain.[0m


In [None]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

logging.debug('Debug logging on')

DEBUG:root:Debug logging on


# 5. Final Render

In [None]:
from PIL import Image
from collections import Counter

def dominant_color(image: Image.Image) -> tuple:
    """
    Compute the dominant color (most frequent) among the non-transparent pixels in an image.
    """
    # Ensure the image is in RGBA mode
    if image.mode != "RGBA":
        image = image.convert("RGBA")

    # Get image data
    data = image.getdata()

    # Filter out transparent pixels and get color frequencies
    color_freqs = Counter([color for color in data if color[3] > 0])

    # Get the most common color (excluding the alpha channel)
    (r, g, b, _), freq = color_freqs.most_common(1)[0]

    return (r, g, b)

def set_background_to_dominant_color(input_path: str, output_path: str) -> None:
    # Load the image
    image = Image.open(input_path)

    # Compute dominant color
    background_color = dominant_color(image)

    # Check if the image has an alpha channel (transparency)
    if image.mode == "RGBA":
        # Split the image into its R, G, B, and A channels
        r, g, b, a = image.split()

        # Create a new image with the dominant background color and the same size as the original image
        background = Image.new("RGB", image.size, background_color)

        # Paste the original image onto the new background using the alpha channel as a mask
        background.paste(image, mask=a)

        # Save the result
        background.save(output_path, "PNG")
    else:
        # If the image has no alpha channel, just save it as is
        image.save(output_path, "PNG")

# Define the path to the image and the output path
input_path = OUTPUT_FOLDER+"/render_shuffled.png"
output_path = OUTPUT_FOLDER+"/render_with_dominant_background.png"

set_background_to_dominant_color(input_path, output_path)

DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 41 65536
