In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import base64
from datetime import datetime
import io
from pprint import pprint
import time

from dotenv import load_dotenv
from openai import OpenAI
import numpy as np
from PIL import Image
from skimage.io import imread, imsave
import stackview
from stackview._image_widget import _img_to_rgb

_ = load_dotenv()

In [None]:
def numpy_to_bytestream(data):
    image = Image.fromarray(data.astype(np.uint8)).convert("RGBA")
    bytes_io = io.BytesIO()
    image.save(bytes_io, format="PNG")
    bytes_io.seek(0)
    return bytes_io.read()

def prompt_dall_e(prompt, model, width=1024, height=1024):
    client = OpenAI()

    response = client.images.generate(
      prompt=prompt,
      model=model,
      n=1,
      size=f"{width}x{height}"
    )

    image_url = response.data[0].url
    image = imread(image_url)[::2, ::2]
    return image

def prompt_chatGPT(image, model):
    rgb_image = _img_to_rgb(image)
    byte_stream = numpy_to_bytestream(rgb_image)
    base64_image = base64.b64encode(byte_stream).decode("utf-8")

    message = [{"role": "user", "content": [{
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
        }
    }]}]
            
    client = OpenAI()
    response = client.chat.completions.create(model=model, messages=message)
    return response.choices[0].message.content

def get_time():
    return datetime.fromtimestamp(time.time()).strftime("%Y%m%d_%H%M%S_%f")

In [None]:
image_filename = "data/cat.png"
image = imread(image_filename)[...,:3]
image.shape
stackview.imshow(image)

In [None]:
for i in range(20):
    description = prompt_chatGPT(image, model="gpt-4-vision-preview")
    pprint(description)
    
    image = prompt_dall_e(description, model="dall-e-3")
    imsave(f"output/cat_{get_time()}.png", image)
    stackview.imshow(image)