In [59]:
import base64
import io
import requests
from requests import Response
from PIL import Image
from openai import OpenAI
from pprint import pprint
import google.generativeai as genai
import google.ai.generativelanguage as glm
from dotenv import load_dotenv
import os
import pathlib

load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE"))



In [60]:
def img2base64(image: Image) -> str:
    tmp = io.BytesIO()
    image.save(tmp, format="WEBP")
    return base64.b64encode(tmp.getvalue()).decode("utf-8")

In [61]:
def clasificar_chatgpt(image: Image) -> Response:
    image = image.resize((64, 64))
    image_base64 = img2base64(image)

     
    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}"
    }

    payload = {
    "model": "gpt-4-vision-preview",
    "messages": [
        {
            "role": "system", 
            "content": "Your task is to classify the image into one of four categories: urban, crop, mountain, or forest. Your output should be in JSON format as follows: {'category': 'the category of the image'}.",
        },
        {
        "role": "user",
        "content": [
            {
            "type": "text",
            "text": "What category does this image belong to?"
            },
            {
            "type": "image_url",
            "image_url": {
                "url": f"data:image/jpeg;base64,{image_base64}"
            }
            }
        ]
        }
    ],
  "max_tokens": 100
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    return response

In [62]:
model = genai.GenerativeModel('gemini-pro-vision')


In [162]:
def dividir_imagen(imagen: Image):

    ancho, alto = imagen.size

    if not (ancho % 128 == 0 and alto % 128 == 0):
        raise ValueError()

    tamaño_trozo = 128
    pos_y = 0
    pos_x = 0
    for y in range(0, alto, tamaño_trozo):
        for x in range(0, ancho, tamaño_trozo):
            trozo = imagen.crop((x, y, x + tamaño_trozo, y + tamaño_trozo))
            trozo.save(f"./chunks/{pos_x}-{pos_y}.jpg")
            pos_x = pos_x + 1
        pos_y = pos_y + 1
        pos_x = 0

In [176]:

def clasificar_gemini(image: Image, model=model):
    image = image.resize((64, 64))

    imagen_bytes = io.BytesIO()
    image.save(imagen_bytes, format='JPEG')
    bytes_de_imagen = imagen_bytes.getvalue()

    response = model.generate_content(
        glm.Content(
            parts = [
                glm.Part(text="Your task is to classify the image into one of four categories: URBAN, CROP, DRY LAND, FOREST. Your output should be in JSON format as follows: {'category': 'the category of the image'}."),
                glm.Part(
                    inline_data=glm.Blob(
                        mime_type='image/jpeg',
                        data=bytes_de_imagen
                    )
                ),
            ],
        ),
        stream=True)
    
    response.resolve()
    return response

In [177]:
todo = Image.open("./chunk_rgb_640x640.jpg")

In [178]:
dividir_imagen(todo)

In [180]:
archivos = os.listdir("./chunks")
print(len(archivos))
# URBAN, CROP, DRY LAND, FOREST
for i, path in enumerate(archivos):
    image = Image.open(f"./chunks/{path}")
    res = clasificar_gemini(image)
    text = res.text

    if "URBAN" in text:
        imagen = Image.new("RGB", (128, 128), (0,0,255))
        imagen.save(f"./chunks_process/{path}")
    elif "CROP" in text:
        imagen = Image.new("RGB", (128, 128), (255,255,0))
        imagen.save(f"./chunks_process/{path}")
    elif "DRY LAND" in text:
        imagen = Image.new("RGB", (128, 128), (255,0,0))
        imagen.save(f"./chunks_process/{path}")
    elif "FOREST" in text:
        imagen = Image.new("RGB", (128, 128), (0,255,0))
        imagen.save(f"./chunks_process/{path}")

    print(i)



25
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


1m 23s para 25 imagenes de 128x128

In [181]:
archivos = os.listdir("./chunks_process")
archivos = sorted(archivos, key=lambda x: (int(x.replace(".jpg", "").split("-")[0]), int(x.replace(".jpg", "").split("-")[1])))
ancho, alto = 640, 640
tamaño_trozo = 128
new_image = Image.new('RGB', (ancho, alto))

for i, path in enumerate(archivos):
    image = Image.open(f"./chunks_process/{path}")
    pos_x, pos_y = (int(path.replace(".jpg", "").split("-")[0]), int(path.replace(".jpg", "").split("-")[1]))
    print(pos_x * tamaño_trozo , pos_y* tamaño_trozo)
    new_image.paste(image, (tamaño_trozo * pos_x, tamaño_trozo  * pos_y))
new_image.save("result.jpg")


0 0
0 128
0 256
0 384
0 512
128 0
128 128
128 256
128 384
128 512
256 0
256 128
256 256
256 384
256 512
384 0
384 128
384 256
384 384
384 512
512 0
512 128
512 256
512 384
512 512


In [182]:
len(archivos)

25