|
3 | 3 | <head> |
4 | 4 | <meta charset="UTF-8"> |
5 | 5 | <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
6 | | - <title>Gemini API Image Processor with Bounding Box Visualization</title> |
| 6 | + <title>Gemini API Image Bounding Box Visualization</title> |
7 | 7 | <script type="module"> |
8 | 8 | import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai"; |
9 | 9 | import { marked } from "https://esm.run/marked"; |
|
38 | 38 | }); |
39 | 39 | } |
40 | 40 |
|
| 41 | + function resizeAndCompressImage(file) { |
| 42 | + return new Promise((resolve) => { |
| 43 | + const reader = new FileReader(); |
| 44 | + reader.onload = function(event) { |
| 45 | + const img = new Image(); |
| 46 | + img.onload = function() { |
| 47 | + const canvas = document.createElement('canvas'); |
| 48 | + const ctx = canvas.getContext('2d'); |
| 49 | + |
| 50 | + let width = img.width; |
| 51 | + let height = img.height; |
| 52 | + |
| 53 | + if (width > 1000) { |
| 54 | + height = Math.round((height * 1000) / width); |
| 55 | + width = 1000; |
| 56 | + } |
| 57 | + |
| 58 | + canvas.width = width; |
| 59 | + canvas.height = height; |
| 60 | + |
| 61 | + ctx.drawImage(img, 0, 0, width, height); |
| 62 | + |
| 63 | + canvas.toBlob((blob) => { |
| 64 | + resolve(new File([blob], "compressed_image.jpg", { type: "image/jpeg" })); |
| 65 | + }, 'image/jpeg', 0.7); |
| 66 | + }; |
| 67 | + img.src = event.target.result; |
| 68 | + }; |
| 69 | + reader.readAsDataURL(file); |
| 70 | + }); |
| 71 | + } |
| 72 | + |
41 | 73 | async function processImageAndPrompt() { |
42 | 74 | const fileInput = document.getElementById('imageInput'); |
43 | 75 | const promptInput = document.getElementById('promptInput'); |
|
52 | 84 |
|
53 | 85 | try { |
54 | 86 | const model = await getGenerativeModel({ model: "gemini-1.5-pro" }); |
55 | | - const imagePart = await fileToGenerativePart(fileInput.files[0]); |
| 87 | + const compressedImage = await resizeAndCompressImage(fileInput.files[0]); |
| 88 | + const imagePart = await fileToGenerativePart(compressedImage); |
56 | 89 |
|
57 | 90 | const result = await model.generateContent([promptInput.value, imagePart]); |
58 | 91 | const response = await result.response; |
|
63 | 96 | // Extract coordinates from the response |
64 | 97 | const coordinates = extractCoordinates(text); |
65 | 98 | if (coordinates.length > 0) { |
66 | | - displayImageWithBoundingBoxes(fileInput.files[0], coordinates); |
| 99 | + displayImageWithBoundingBoxes(compressedImage, coordinates); |
67 | 100 | } |
68 | 101 | } catch (error) { |
69 | 102 | resultDiv.innerHTML = `Error: ${error.message}`; |
|
82 | 115 | const image = new Image(); |
83 | 116 | image.onload = function() { |
84 | 117 | const canvas = document.getElementById('canvas'); |
85 | | - canvas.width = image.width; |
86 | | - canvas.height = image.height; |
| 118 | + canvas.width = image.width + 100; |
| 119 | + canvas.height = image.height + 100; |
87 | 120 | const ctx = canvas.getContext('2d'); |
88 | | - ctx.drawImage(image, 0, 0); |
| 121 | + |
| 122 | + // Draw the image |
| 123 | + ctx.drawImage(image, 80, 20); |
89 | 124 |
|
| 125 | + // Draw grid lines |
| 126 | + ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; // Red with 50% opacity |
| 127 | + ctx.lineWidth = 1; |
| 128 | + |
| 129 | + // Vertical grid lines |
| 130 | + for (let i = 0; i <= 1000; i += 100) { |
| 131 | + const x = 80 + i / 1000 * image.width; |
| 132 | + ctx.beginPath(); |
| 133 | + ctx.moveTo(x, 20); |
| 134 | + ctx.lineTo(x, image.height + 20); |
| 135 | + ctx.stroke(); |
| 136 | + } |
| 137 | + |
| 138 | + // Horizontal grid lines |
| 139 | + for (let i = 0; i <= 1000; i += 100) { |
| 140 | + const y = 20 + (1000 - i) / 1000 * image.height; |
| 141 | + ctx.beginPath(); |
| 142 | + ctx.moveTo(80, y); |
| 143 | + ctx.lineTo(image.width + 80, y); |
| 144 | + ctx.stroke(); |
| 145 | + } |
| 146 | + |
| 147 | + // Draw bounding boxes |
90 | 148 | const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF']; |
91 | 149 | coordinates.forEach((box, index) => { |
92 | | - const [ymin, xmin, ymax, xmax] = box.map(coord => (1000 - coord) / 1000); |
| 150 | + const [ymin, xmin, ymax, xmax] = box.map(coord => coord / 1000); |
| 151 | + |
93 | 152 | const width = (xmax - xmin) * image.width; |
94 | 153 | const height = (ymax - ymin) * image.height; |
95 | 154 |
|
96 | 155 | ctx.strokeStyle = colors[index % colors.length]; |
97 | 156 | ctx.lineWidth = 5; |
98 | | - ctx.strokeRect(xmin * image.width, ymin * image.height, width, height); |
| 157 | + ctx.strokeRect(xmin * image.width + 80, ymin * image.height + 20, width, height); |
99 | 158 | }); |
| 159 | + |
| 160 | + // Draw axes and labels |
| 161 | + ctx.strokeStyle = '#000000'; |
| 162 | + ctx.lineWidth = 1; |
| 163 | + ctx.font = '26px Arial'; |
| 164 | + ctx.textAlign = 'right'; |
| 165 | + |
| 166 | + // Y-axis |
| 167 | + ctx.beginPath(); |
| 168 | + ctx.moveTo(80, 20); |
| 169 | + ctx.lineTo(80, image.height + 20); |
| 170 | + ctx.stroke(); |
| 171 | + |
| 172 | + // Y-axis labels and ticks |
| 173 | + for (let i = 0; i <= 1000; i += 100) { |
| 174 | + const y = 20 + (1000 - i) / 1000 * image.height; |
| 175 | + ctx.fillText(i.toString(), 75, y + 5); |
| 176 | + ctx.beginPath(); |
| 177 | + ctx.moveTo(75, y); |
| 178 | + ctx.lineTo(80, y); |
| 179 | + ctx.stroke(); |
| 180 | + } |
| 181 | + |
| 182 | + // X-axis |
| 183 | + ctx.beginPath(); |
| 184 | + ctx.moveTo(80, image.height + 20); |
| 185 | + ctx.lineTo(image.width + 80, image.height + 20); |
| 186 | + ctx.stroke(); |
| 187 | + |
| 188 | + // X-axis labels and ticks |
| 189 | + ctx.textAlign = 'center'; |
| 190 | + for (let i = 0; i <= 1000; i += 100) { |
| 191 | + const x = 80 + i / 1000 * image.width; |
| 192 | + ctx.fillText(i.toString(), x, image.height + 40); |
| 193 | + ctx.beginPath(); |
| 194 | + ctx.moveTo(x, image.height + 20); |
| 195 | + ctx.lineTo(x, image.height + 25); |
| 196 | + ctx.stroke(); |
| 197 | + } |
100 | 198 | }; |
101 | 199 | image.src = event.target.result; |
102 | 200 | }; |
|
129 | 227 | </style> |
130 | 228 | </head> |
131 | 229 | <body> |
132 | | - <h1>Gemini API Image Processor with Bounding Box Visualization</h1> |
| 230 | + <h1>Optimized Gemini API Image Processor with Bounding Box Visualization</h1> |
133 | 231 | <input type="file" id="imageInput" accept="image/*"> |
134 | | - <textarea id="promptInput">Return bounding boxes as [ymin, xmin, ymax, xmax] |
| 232 | + <textarea id="promptInput">Return bounding boxes as JSON arrays [ymin, xmin, ymax, xmax] |
135 | 233 | </textarea> |
136 | 234 | <button id="submitBtn">Process</button> |
137 | 235 | <div id="result"></div> |
|
0 commit comments