# Lab 9: Working with Image Models

In [None]:
from dotenv import load_dotenv
load_dotenv(override=True)

### Describe an image

In [None]:
from openai import OpenAI
client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "What's in this image?"},
            {
                "type": "image_url",
                "image_url": {
                    "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
                },
            },
        ],
    }],
)

print(response.choices[0].message.content)

### Describe a plot/chart

In [None]:
# Load the image file in binary mode
with open("./assets/graph.jpeg", "rb") as image_file:
    image_data = image_file.read()

import base64
base64_image = base64.b64encode(image_data).decode('utf-8')

response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Describe the following chart?"},
            {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64_image}},
        ],
    }],
)

print(response.choices[0].message.content)

### Extract values from image 

In [None]:
# Load the image file in binary mode
with open("./assets/KTP1.png", "rb") as image_file:
    image_data = image_file.read()

import base64
base64_image = base64.b64encode(image_data).decode('utf-8')

response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Extract the information in the following image and present the result in JSON format."},
            {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64_image}},
        ],
    }],
)

print(response.choices[0].message.content)

In [None]:
# Load the image file in binary mode
with open("./assets/KTP2.jpg", "rb") as image_file:
    image_data = image_file.read()

import base64
base64_image = base64.b64encode(image_data).decode('utf-8')

response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Extract the information in the following image and present the result in JSON format."},
            {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64_image}},
        ],
    }],
)

print(response.choices[0].message.content)

### Handwritting Recognition

In [None]:
# Load the image file in binary mode
with open("./assets/handwriting1.jpg", "rb") as image_file:
    image_data = image_file.read()

import base64
base64_image = base64.b64encode(image_data).decode('utf-8')

response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Read the following handwritten letter. Write the message by keeping the format."},
            {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64_image}},
        ],
    }],
)

print(response.choices[0].message.content)

In [None]:
# Load the image file in binary mode
with open("./assets/handwritten_form.jpg", "rb") as image_file:
    image_data = image_file.read()

import base64
base64_image = base64.b64encode(image_data).decode('utf-8')

response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Extract the address written on the following form."},
            {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64_image}},
        ],
    }],
)

print(response.choices[0].message.content)

In [None]:
# Load the image file in binary mode
with open("./assets/handwritten_form1.webp", "rb") as image_file:
    image_data = image_file.read()

import base64
base64_image = base64.b64encode(image_data).decode('utf-8')

response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[{
        "role": "user",
        "content": [
            {"type": "text", "text": "Extract all filled fields from the following form."},
            {"type": "image_url", "image_url": {"url": "data:image/png;base64," + base64_image}},
        ],
    }],
)

print(response.choices[0].message.content)

## GENERATION

### Image Generation

In [None]:
import base64
import os
from openai import OpenAI
from PIL import Image
from io import BytesIO
from IPython.display import Image as IPImage, display

client = OpenAI()

In [None]:
# Generate the image
prompt1 = """
Render a realistic image of this character:
Blobby Alien Character Spec Name: Glorptak (or nickname: "Glorp")
Visual Appearance Body Shape: Amorphous and gelatinous. Overall silhouette resembles a teardrop or melting marshmallow, shifting slightly over time. Can squish and elongate when emotional or startled.
Material Texture: Semi-translucent, bio-luminescent goo with a jelly-like wobble. Surface occasionally ripples when communicating or moving quickly.
Color Palette:
- Base: Iridescent lavender or seafoam green
- Accents: Subsurface glowing veins of neon pink, electric blue, or golden yellow
- Mood-based color shifts (anger = dark red, joy = bright aqua, fear = pale gray)
Facial Features:
- Eyes: 3-5 asymmetrical floating orbs inside the blob that rotate or blink independently
- Mouth: Optional—appears as a rippling crescent on the surface when speaking or emoting
- No visible nose or ears; uses vibration-sensitive receptors embedded in goo
"""

img_path1 = "imgs/glorptak.jpg"

result1 = client.images.generate(
    model="dall-e-2",
    prompt=prompt1,
    size="256x256"
)

In [None]:
# Show the result
import requests
from PIL import Image
from io import BytesIO

response = requests.get(result1.data[0].url)
image = Image.open(BytesIO(response.content))

# Display the image inline
display(image)


# END