Skip to content

Commit b3fcfe7

Browse files
authored
1 parent 61f38cb commit b3fcfe7

File tree

2 files changed

+201
-0
lines changed

2 files changed

+201
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- [Render Markdown](https://tools.simonwillison.net/render-markdown) renders Markdown to HTML using the GitHub Markdown API
66
- [Annotated presentation creator](https://til.simonwillison.net/tools/annotated-presentations) to help turn slides into an annotated presentation
77
- [Arena animated](https://tools.simonwillison.net/arena-animated) animates the progression of the LMSYS Chatbot Arena, inspired by [this visualization](https://public.flourish.studio/visualisation/17992181/) by [Peter Gostev](https://www.linkedin.com/posts/peter-gostev_how-companies-llms-compare-over-the-course-activity-7196899934615257090-zilk) (via [Time-Winter-4319 on Reddit](https://www.reddit.com/r/LocalLLaMA/comments/1bp4j19/gpt4_is_no_longer_the_top_dog_timelapse_of/))
8+
- [Compare PDFs](https://tools.simonwillison.net/compare-pdfs) provides a visual comparison of the pages of two PDF files
89

910
On Observable:
1011

compare-pdfs.html

+200
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>Compare PDFs</title>
7+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.min.js"></script>
8+
<style>
9+
html { font-family: verdana; }
10+
#dropZone {
11+
border: 2px dashed #ccc;
12+
border-radius: 20px;
13+
width: 480px;
14+
padding: 20px;
15+
text-align: center;
16+
margin: 20px auto;
17+
}
18+
#dropZone.dragover {
19+
background-color: #e1f5fe;
20+
border-color: #03a9f4;
21+
}
22+
.page-container {
23+
display: flex;
24+
margin-bottom: 20px;
25+
}
26+
.page-container > div {
27+
margin-right: 10px;
28+
}
29+
#error {
30+
color: red;
31+
text-align: center;
32+
}
33+
</style>
34+
</head>
35+
<body>
36+
<h1>Compare PDFs</h1>
37+
<div id="dropZone">
38+
<p>Drag and drop two PDF files here, or click to select files</p>
39+
<input type="file" id="fileInput" multiple accept=".pdf" style="display: none;">
40+
</div>
41+
<div id="error"></div>
42+
<div id="pdfComparison"></div>
43+
44+
<script>
45+
// Initialize PDF.js worker
46+
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.worker.min.js';
47+
48+
const dropZone = document.getElementById('dropZone');
49+
const fileInput = document.getElementById('fileInput');
50+
const errorDiv = document.getElementById('error');
51+
const pdfComparison = document.getElementById('pdfComparison');
52+
53+
let pdf1, pdf2;
54+
55+
dropZone.addEventListener('click', () => fileInput.click());
56+
57+
dropZone.addEventListener('dragover', (e) => {
58+
e.preventDefault();
59+
dropZone.classList.add('dragover');
60+
});
61+
62+
dropZone.addEventListener('dragleave', () => {
63+
dropZone.classList.remove('dragover');
64+
});
65+
66+
dropZone.addEventListener('drop', handleFiles);
67+
fileInput.addEventListener('change', handleFiles);
68+
69+
function handleFiles(e) {
70+
e.preventDefault();
71+
dropZone.classList.remove('dragover');
72+
const files = e.dataTransfer ? e.dataTransfer.files : e.target.files;
73+
74+
if (files.length === 2) {
75+
pdf1 = files[0];
76+
pdf2 = files[1];
77+
errorDiv.textContent = '';
78+
renderPDFs();
79+
} else {
80+
errorDiv.textContent = 'Please select exactly two PDF files.';
81+
}
82+
}
83+
84+
async function renderPDFs() {
85+
try {
86+
const [doc1, doc2] = await Promise.all([
87+
pdfjsLib.getDocument(URL.createObjectURL(pdf1)).promise,
88+
pdfjsLib.getDocument(URL.createObjectURL(pdf2)).promise
89+
]);
90+
91+
const pageCount = Math.max(doc1.numPages, doc2.numPages);
92+
pdfComparison.innerHTML = '';
93+
94+
for (let i = 1; i <= pageCount; i++) {
95+
const [page1, page2] = await Promise.all([
96+
i <= doc1.numPages ? doc1.getPage(i) : null,
97+
i <= doc2.numPages ? doc2.getPage(i) : null
98+
]);
99+
100+
const [canvas1, canvas2] = await Promise.all([
101+
page1 ? renderPageToCanvas(page1) : createEmptyCanvas(),
102+
page2 ? renderPageToCanvas(page2) : createEmptyCanvas()
103+
]);
104+
105+
const diffCanvas = compareCanvases(canvas1, canvas2);
106+
107+
displayPage(i, canvas1, canvas2, diffCanvas);
108+
}
109+
} catch (error) {
110+
console.error('Error rendering PDFs:', error);
111+
errorDiv.textContent = 'Error rendering PDFs. Please try again with valid PDF files.';
112+
}
113+
}
114+
115+
async function renderPageToCanvas(page) {
116+
const scale = 1.5;
117+
const viewport = page.getViewport({ scale });
118+
const canvas = document.createElement('canvas');
119+
const context = canvas.getContext('2d');
120+
canvas.height = viewport.height;
121+
canvas.width = viewport.width;
122+
123+
await page.render({ canvasContext: context, viewport }).promise;
124+
return canvas;
125+
}
126+
127+
function createEmptyCanvas() {
128+
const canvas = document.createElement('canvas');
129+
canvas.width = 1;
130+
canvas.height = 1;
131+
return canvas;
132+
}
133+
134+
function compareCanvases(canvas1, canvas2) {
135+
const width = Math.max(canvas1.width, canvas2.width);
136+
const height = Math.max(canvas1.height, canvas2.height);
137+
138+
const diffCanvas = document.createElement('canvas');
139+
diffCanvas.width = width;
140+
diffCanvas.height = height;
141+
const ctx = diffCanvas.getContext('2d');
142+
143+
const img1 = canvas1.getContext('2d').getImageData(0, 0, canvas1.width, canvas1.height);
144+
const img2 = canvas2.getContext('2d').getImageData(0, 0, canvas2.width, canvas2.height);
145+
146+
const diff = ctx.createImageData(width, height);
147+
148+
for (let y = 0; y < height; y++) {
149+
for (let x = 0; x < width; x++) {
150+
const i = (y * width + x) * 4;
151+
const r1 = img1.data[i] || 255;
152+
const g1 = img1.data[i + 1] || 255;
153+
const b1 = img1.data[i + 2] || 255;
154+
const r2 = img2.data[i] || 255;
155+
const g2 = img2.data[i + 1] || 255;
156+
const b2 = img2.data[i + 2] || 255;
157+
158+
if (r1 !== r2 || g1 !== g2 || b1 !== b2) {
159+
diff.data[i] = 255;
160+
diff.data[i + 1] = 0;
161+
diff.data[i + 2] = 0;
162+
diff.data[i + 3] = 128;
163+
}
164+
}
165+
}
166+
167+
ctx.putImageData(diff, 0, 0);
168+
return diffCanvas;
169+
}
170+
171+
function displayPage(pageNumber, canvas1, canvas2, diffCanvas) {
172+
const container = document.createElement('div');
173+
container.className = 'page-container';
174+
175+
const addCanvas = (canvas, title) => {
176+
const div = document.createElement('div');
177+
const h3 = document.createElement('h3');
178+
h3.textContent = title;
179+
div.appendChild(h3);
180+
div.appendChild(canvas);
181+
container.appendChild(div);
182+
};
183+
184+
addCanvas(canvas1, `PDF 1 - Page ${pageNumber}`);
185+
addCanvas(canvas2, `PDF 2 - Page ${pageNumber}`);
186+
addCanvas(diffCanvas, `Differences - Page ${pageNumber}`);
187+
188+
pdfComparison.appendChild(container);
189+
}
190+
</script>
191+
192+
<h3>Claude 3.5 Sonnet prompts used:</h3>
193+
194+
<ol>
195+
<li><code>Build a tool where I can drag and drop on two PDF files and it uses PDF.js to turn each of their pages into canvas elements and then displays those pages side by side with a third image that highlights any differences between them, if any differences exist</code></li>
196+
<li><code>rewrite that code to not use React at all</code></li>
197+
</ol>
198+
199+
</body>
200+
</html>

0 commit comments

Comments
 (0)