In [3]:
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration
from google.colab import files

# 1. Upload your local screenshot
uploaded = files.upload()
image_path = list(uploaded.keys())[0]
print("Uploaded:", image_path)

# 2. Load the LLaVA model & processor
model_id = "llava-hf/llava-1.5-7b-hf"

model = LlavaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)

# 3. Prepare a conversation for "Screenshot → Code Generation"
conversation = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {
                "type": "text",
                "text": (
                    "Develop HTML that reflects the structure of the webpage illustrated in the provided image."
                )
            },
        ],
    }
]

prompt_text = processor.apply_chat_template(
    conversation,
    add_generation_prompt=True
)

# 4. Open the uploaded screenshot locally and process
screenshot_image = Image.open(image_path).convert("RGB")

inputs = processor(
    images=screenshot_image,
    text=prompt_text,
    return_tensors="pt",
).to(model.device, torch.float16)

# 5. Generate HTML/CSS from the screenshot
with torch.inference_mode():
    generated_ids = model.generate(
        **inputs,
        max_new_tokens=2048,
        temperature=0.7,
        top_p=0.9
    )

# 6. Decode the result
result_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
web2code_output = result_texts[0]

print("=== HTML/CSS Code ===")
print(web2code_output)


Saving image_0_1.png to image_0_1 (2).png
Uploaded: image_0_1 (2).png


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



=== HTML/CSS Code ===
USER:  
Develop HTML that reflects the structure of the webpage illustrated in the provided image. ASSISTANT: To create a webpage with a similar structure to the one in the image, you can use HTML elements such as `<div>`, `<p>`, and `<img>`. Here's an example of how you can structure the webpage:
```html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>World Wide Webpage</title>
</head>
<body>
    <div class="header">
        <p>World Wide Webpage</p>
        <img src="your-image-source.jpg" alt="Your Image">
    </div>
    <div class="content">
        <p>Welcome to the World Wide Webpage!</p>
        <p>This is a sample webpage that demonstrates how to structure a website using HTML.</p>
    </div>
</body>
</html>
```
In this example, the `<div>` element is used to create a header with a title and an image. The `<p>` element is used to display text content. Y