Skip to content

Commit 33199b2

Browse files
authored
1 parent 1390c40 commit 33199b2

File tree

1 file changed

+142
-0
lines changed

1 file changed

+142
-0
lines changed

gemini-bbox-tool.html

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>Gemini API Image Processor with Bounding Box Visualization</title>
7+
<script type="module">
8+
import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai";
9+
import { marked } from "https://esm.run/marked";
10+
11+
function getApiKey() {
12+
let apiKey = localStorage.getItem("GEMINI_API_KEY");
13+
if (!apiKey) {
14+
apiKey = prompt("Please enter your Gemini API key:");
15+
if (apiKey) {
16+
localStorage.setItem("GEMINI_API_KEY", apiKey);
17+
}
18+
}
19+
return apiKey;
20+
}
21+
22+
async function getGenerativeModel(params) {
23+
const API_KEY = getApiKey();
24+
const genAI = new GoogleGenerativeAI(API_KEY);
25+
return genAI.getGenerativeModel(params);
26+
}
27+
28+
async function fileToGenerativePart(file) {
29+
return new Promise((resolve) => {
30+
const reader = new FileReader();
31+
reader.onloadend = () => resolve({
32+
inlineData: {
33+
data: reader.result.split(",")[1],
34+
mimeType: file.type
35+
}
36+
});
37+
reader.readAsDataURL(file);
38+
});
39+
}
40+
41+
async function processImageAndPrompt() {
42+
const fileInput = document.getElementById('imageInput');
43+
const promptInput = document.getElementById('promptInput');
44+
const resultDiv = document.getElementById('result');
45+
46+
if (!fileInput.files[0] || !promptInput.value) {
47+
alert('Please select an image and enter a prompt.');
48+
return;
49+
}
50+
51+
resultDiv.innerHTML = 'Processing...';
52+
53+
try {
54+
const model = await getGenerativeModel({ model: "gemini-1.5-pro" });
55+
const imagePart = await fileToGenerativePart(fileInput.files[0]);
56+
57+
const result = await model.generateContent([promptInput.value, imagePart]);
58+
const response = await result.response;
59+
const text = response.text();
60+
61+
resultDiv.innerHTML = marked.parse(text);
62+
63+
// Extract coordinates from the response
64+
const coordinates = extractCoordinates(text);
65+
if (coordinates.length > 0) {
66+
displayImageWithBoundingBoxes(fileInput.files[0], coordinates);
67+
}
68+
} catch (error) {
69+
resultDiv.innerHTML = `Error: ${error.message}`;
70+
}
71+
}
72+
73+
function extractCoordinates(text) {
74+
const regex = /\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]/g;
75+
const matches = text.match(regex) || [];
76+
return matches.map(JSON.parse);
77+
}
78+
79+
function displayImageWithBoundingBoxes(file, coordinates) {
80+
const reader = new FileReader();
81+
reader.onload = function(event) {
82+
const image = new Image();
83+
image.onload = function() {
84+
const canvas = document.getElementById('canvas');
85+
canvas.width = image.width;
86+
canvas.height = image.height;
87+
const ctx = canvas.getContext('2d');
88+
ctx.drawImage(image, 0, 0);
89+
90+
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
91+
coordinates.forEach((box, index) => {
92+
const [ymin, xmin, ymax, xmax] = box.map(coord => (1000 - coord) / 1000);
93+
const width = (xmax - xmin) * image.width;
94+
const height = (ymax - ymin) * image.height;
95+
96+
ctx.strokeStyle = colors[index % colors.length];
97+
ctx.lineWidth = 2;
98+
ctx.strokeRect(xmin * image.width, ymin * image.height, width, height);
99+
});
100+
};
101+
image.src = event.target.result;
102+
};
103+
reader.readAsDataURL(file);
104+
}
105+
106+
// Attach event listener to the submit button
107+
document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
108+
</script>
109+
<style>
110+
body {
111+
font-family: Arial, sans-serif;
112+
max-width: 800px;
113+
margin: 0 auto;
114+
padding: 20px;
115+
}
116+
textarea {
117+
width: 100%;
118+
height: 100px;
119+
}
120+
#result, #imageContainer {
121+
margin-top: 20px;
122+
border: 1px solid #ccc;
123+
padding: 10px;
124+
}
125+
#canvas {
126+
max-width: 100%;
127+
height: auto;
128+
}
129+
</style>
130+
</head>
131+
<body>
132+
<h1>Gemini API Image Processor with Bounding Box Visualization</h1>
133+
<input type="file" id="imageInput" accept="image/*">
134+
<textarea id="promptInput">Return bounding boxes as [ymin, xmin, ymax, xmax]
135+
</textarea>
136+
<button id="submitBtn">Process</button>
137+
<div id="result"></div>
138+
<div id="imageContainer">
139+
<canvas id="canvas"></canvas>
140+
</div>
141+
</body>
142+
</html>

0 commit comments

Comments
 (0)