Skip to content

Commit f3410f9

Browse files
committed
A ton of UI tweaks
- Shows full document textarea at top, but only if there are multiple pages - Prevents user from selecting a new file while a file is being processed - Better indications of progress, including a done x of y message
1 parent 16a97ab commit f3410f9

File tree

1 file changed

+133
-61
lines changed

1 file changed

+133
-61
lines changed

pdf-ocr.html

+133-61
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
<!DOCTYPE html>
22
<html>
33
<head>
4-
<title>OCR a PDF</title>
4+
<title>OCR PDFs and images directly in your browser</title>
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
56
<script defer data-domain="tools.simonwillison.net" src="https://plausible.io/js/script.js"></script>
67
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.9.359/pdf.min.js"></script>
78
<script src="https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js"></script>
89
<style>
910
body {
10-
padding: 2em;
11+
padding: 1em;
1112
font-family: helvetica, sans-serif;
13+
line-height: 1.3;
1214
}
1315
.dropzone {
16+
box-sizing: border-box;
1417
width: 100%;
1518
height: 10em;
1619
border: 2px dashed #ccc;
@@ -19,42 +22,77 @@
1922
align-items: center;
2023
font-size: 24px;
2124
cursor: pointer;
25+
padding: 1em;
26+
}
27+
.dropzone.disabled {
28+
cursor: not-allowed;
2229
}
2330
.dropzone.drag-over {
2431
background-color: pink;
2532
}
2633
.image-container img {
2734
margin-bottom: 10px;
35+
max-width: 100%;
2836
}
29-
.textarea-alt {
37+
textarea {
3038
width: 100%;
3139
height: 10em;
3240
margin-bottom: 20px;
41+
box-sizing: border-box;
3342
}
34-
.full-document {
35-
width: 100%;
36-
height: 30em;
37-
margin-top: 20px;
43+
.full-document-section {
44+
display: none;
45+
margin-bottom: 20px;
3846
}
3947
</style>
4048
</head>
4149
<body>
42-
<h1>OCR a PDF</h1>
50+
<h1>OCR PDFs and images directly in your browser</h1>
4351
<p>This tool runs entirely in your browser. No files are uploaded to a server.</p>
44-
<input type="file" id="fileInput" accept=".pdf" style="display: none;" />
52+
<p>It uses <a href="https://tesseract.projectnaptha.com/">Tesseract.js</a> for OCR and <a href="https://mozilla.github.io/pdf.js/">PDF.js</a> to convert PDFs into images.</p>
53+
<input type="file" id="fileInput" accept=".pdf,.jpg,.jpeg,.png,.gif" style="display: none;" />
4554
<div class="dropzone" id="dropzone">
46-
Drag and drop PDF file here or click to select a file
55+
Drag and drop a PDF, JPG, PNG, or GIF file here or click to select a file
56+
</div>
57+
<div class="full-document-section" id="fullDocumentSection">
58+
<h2>Full document</h2>
59+
<textarea class="full-document" id="fullDocument"></textarea>
60+
<h2>Pages</h2>
4761
</div>
4862
<div class="image-container"></div>
49-
<h2>Full document</h2>
50-
<textarea class="full-document" id="fullDocument"></textarea>
5163

5264
<script>
5365
const desiredWidth = 800;
5466
const dropzone = document.getElementById('dropzone');
5567
const fileInput = document.getElementById('fileInput');
5668
const imageContainer = document.querySelector('.image-container');
5769
const fullDocumentTextarea = document.getElementById('fullDocument');
70+
const fullDocumentSection = document.getElementById('fullDocumentSection');
71+
72+
let fileSelectionAllowed = true;
73+
74+
function showFullDocument() {
75+
// Only shows if there are multiple populated textareas
76+
const populatedTextareas = Array.from(
77+
document.querySelectorAll('.image-container textarea')
78+
).filter(ta => ta.value.trim().length);
79+
if (populatedTextareas.length > 1) {
80+
fullDocumentTextarea.value = populatedTextareas.map(ta => ta.value.trim()).join("\n\n");
81+
fullDocumentSection.style.display = 'block';
82+
} else {
83+
fullDocumentTextarea.value = '';
84+
fullDocumentSection.style.display = 'none';
85+
}
86+
}
87+
88+
function setTextarea(ta, text) {
89+
ta.value = text.trim();
90+
// Set textarea height to fit content
91+
ta.style.height = 'auto';
92+
ta.style.height = (ta.scrollHeight + 5) + 'px';
93+
}
94+
95+
const worker = Tesseract.createWorker();
5896

5997
dropzone.addEventListener('dragover', handleDragOver);
6098
dropzone.addEventListener('dragleave', handleDragLeave);
@@ -65,24 +103,32 @@ <h2>Full document</h2>
65103

66104
async function handleDragOver(event) {
67105
event.preventDefault();
68-
dropzone.classList.add('drag-over');
106+
if (fileSelectionAllowed) {
107+
dropzone.classList.add('drag-over');
108+
}
69109
}
70110

71111
async function handleDragLeave(event) {
72112
event.preventDefault();
73-
dropzone.classList.remove('drag-over');
113+
if (fileSelectionAllowed) {
114+
dropzone.classList.remove('drag-over');
115+
}
74116
}
75117

76118
async function handleDrop(event) {
77119
event.preventDefault();
78-
dropzone.classList.remove('drag-over');
79-
const file = event.dataTransfer.files[0];
80-
fileInput.files = event.dataTransfer.files;
81-
processFile(file);
120+
if (fileSelectionAllowed) {
121+
dropzone.classList.remove('drag-over');
122+
const file = event.dataTransfer.files[0];
123+
fileInput.files = event.dataTransfer.files;
124+
processFile(file);
125+
}
82126
}
83127

84128
async function handleClick() {
85-
fileInput.click();
129+
if (fileSelectionAllowed) {
130+
fileInput.click();
131+
}
86132
}
87133

88134
fileInput.addEventListener('change', (event) => {
@@ -91,65 +137,91 @@ <h2>Full document</h2>
91137
});
92138

93139
async function processFile(file) {
94-
const imageIterator = convertPDFToImages(file);
95-
let fullText = '';
96-
97-
for await (const { imageURL } of imageIterator) {
98-
const imgElement = document.createElement('img');
99-
imgElement.src = imageURL;
100-
imageContainer.appendChild(imgElement);
140+
fullDocumentTextarea.value = '';
141+
fullDocumentSection.style.display = 'none';
142+
imageContainer.innerHTML = '';
143+
const originalText = dropzone.innerText;
144+
dropzone.innerText = 'Processing file...';
145+
dropzone.classList.add('disabled');
146+
fileSelectionAllowed = false;
101147

102-
const altTextarea = document.createElement('textarea');
103-
altTextarea.classList.add('textarea-alt');
104-
altTextarea.placeholder = 'Processing...';
105-
imageContainer.appendChild(altTextarea);
148+
await worker.load();
149+
await worker.loadLanguage("eng");
150+
await worker.initialize("eng");
106151

152+
if (file.type === 'application/pdf') {
153+
const { numPages, imageIterator } = await convertPDFToImages(file);
154+
let done = 0;
155+
dropzone.innerText = `Processing ${numPages} page${numPages > 1 ? 's' : ''}`;
156+
for await (const { imageURL } of imageIterator) {
157+
const ta = await processImage(imageURL);
158+
const { text } = await ocrImage(imageURL);
159+
setTextarea(ta, text);
160+
showFullDocument();
161+
done += 1;
162+
dropzone.innerText = `Done ${done} of ${numPages}`;
163+
}
164+
} else {
165+
const imageURL = URL.createObjectURL(file);
166+
const ta = await processImage(imageURL);
107167
const { text } = await ocrImage(imageURL);
108-
altTextarea.value = text;
109-
altTextarea.placeholder = '';
110-
fullText += text + '\n\n';
111-
fullDocumentTextarea.value = fullText.trim();
168+
setTextarea(ta, text);
169+
showFullDocument();
112170
}
171+
172+
await worker.terminate();
173+
dropzone.innerText = originalText;
174+
dropzone.classList.remove('disabled');
175+
fileSelectionAllowed = true;
113176
}
114177

115-
async function* convertPDFToImages(file) {
116-
try {
117-
const pdf = await pdfjsLib.getDocument(URL.createObjectURL(file)).promise;
118-
const numPages = pdf.numPages;
178+
async function processImage(imageURL) {
179+
const imgElement = document.createElement('img');
180+
imgElement.src = imageURL;
181+
imageContainer.appendChild(imgElement);
119182

183+
const altTextarea = document.createElement('textarea');
184+
altTextarea.classList.add('textarea-alt');
185+
altTextarea.placeholder = 'OCRing image...';
186+
imageContainer.appendChild(altTextarea);
187+
188+
return altTextarea;
189+
}
190+
191+
async function convertPDFToImages(file) {
192+
// returns { numPages, imageIterator }
193+
const pdf = await pdfjsLib.getDocument(URL.createObjectURL(file)).promise;
194+
const numPages = pdf.numPages;
195+
async function* images() {
120196
for (let i = 1; i <= numPages; i++) {
121-
const page = await pdf.getPage(i);
122-
const viewport = page.getViewport({ scale: 1 });
123-
const canvas = document.createElement('canvas');
124-
const context = canvas.getContext('2d');
125-
canvas.width = desiredWidth;
126-
canvas.height = (desiredWidth / viewport.width) * viewport.height;
127-
const renderContext = {
128-
canvasContext: context,
129-
viewport: page.getViewport({ scale: desiredWidth / viewport.width }),
130-
};
131-
await page.render(renderContext).promise;
132-
const imageURL = canvas.toDataURL('image/jpeg', 0.8);
133-
yield { imageURL };
197+
try {
198+
const page = await pdf.getPage(i);
199+
const viewport = page.getViewport({ scale: 1 });
200+
const canvas = document.createElement('canvas');
201+
const context = canvas.getContext('2d');
202+
canvas.width = desiredWidth;
203+
canvas.height = (desiredWidth / viewport.width) * viewport.height;
204+
const renderContext = {
205+
canvasContext: context,
206+
viewport: page.getViewport({ scale: desiredWidth / viewport.width }),
207+
};
208+
await page.render(renderContext).promise;
209+
const imageURL = canvas.toDataURL('image/jpeg', 0.8);
210+
yield { imageURL };
211+
} catch (error) {
212+
console.error(`Error rendering page ${i}:`, error);
213+
}
134214
}
135-
} catch (error) {
136-
console.error('Error:', error);
137215
}
216+
return {numPages: numPages, imageIterator: images()};
138217
}
139218

140219
async function ocrImage(imageUrl) {
141-
const worker = Tesseract.createWorker();
142-
await worker.load();
143-
await worker.loadLanguage("eng");
144-
await worker.initialize("eng");
145-
146220
const {
147221
data: { text },
148222
} = await worker.recognize(imageUrl);
149-
150-
await worker.terminate();
151223
return { text };
152224
}
153225
</script>
154226
</body>
155-
</html>
227+
</html>

0 commit comments

Comments
 (0)