Skip to content

Commit 986ee00

Browse files
committed
feat(api): add automated cloudflare challenge solver
1 parent 997e89b commit 986ee00

File tree

3 files changed

+152
-7
lines changed

3 files changed

+152
-7
lines changed

apps/api/src/controllers/StreamingScrapeController.ts

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ export class StreamingScrapeController {
5050
// Helper to send SSE event
5151
// Helper to send SSE event
5252
const sendEvent = (event: string, data: any) => {
53+
if (res.writableEnded || res.destroyed) {
54+
return;
55+
}
56+
5357
// Optimization: Don't stringify large payloads for logging to prevent OOM
5458
let logPreview = '';
5559
if (data && (event === 'result' || event === 'progress')) {
@@ -76,14 +80,47 @@ export class StreamingScrapeController {
7680
// Add this response as a listener for the job
7781
jobManager.addListener(job.id, sendEvent);
7882

79-
req.on('close', () => {
83+
let cleanedUp = false;
84+
const cleanupStream = () => {
85+
if (cleanedUp) {
86+
return;
87+
}
88+
cleanedUp = true;
8089
jobManager.removeListener(job.id, sendEvent);
90+
req.removeListener('aborted', handleRequestAborted);
91+
res.removeListener('close', handleResponseClosed);
92+
res.removeListener('finish', handleResponseFinished);
93+
};
8194

95+
const cancelIfStillRunning = () => {
8296
const currentJob = jobManager.getJob(job.id);
8397
if (currentJob && (currentJob.status === 'pending' || currentJob.status === 'running')) {
8498
jobManager.cancelJob(job.id);
8599
}
86-
});
100+
};
101+
102+
const handleRequestAborted = () => {
103+
cleanupStream();
104+
cancelIfStillRunning();
105+
};
106+
107+
const handleResponseClosed = () => {
108+
cleanupStream();
109+
110+
// `close` fires after `finish` on normal completion. Only treat it as a disconnect
111+
// when the response did not finish writing the SSE stream.
112+
if (!res.writableEnded) {
113+
cancelIfStillRunning();
114+
}
115+
};
116+
117+
const handleResponseFinished = () => {
118+
cleanupStream();
119+
};
120+
121+
req.on('aborted', handleRequestAborted);
122+
res.on('close', handleResponseClosed);
123+
res.on('finish', handleResponseFinished);
87124

88125
// Progress callback - stores in job and broadcasts
89126
const onProgress = (progress: StreamProgress) => {
@@ -139,7 +176,7 @@ export class StreamingScrapeController {
139176
}).catch(err => console.error('Log failed:', err));
140177

141178
// Remove listener and end response
142-
jobManager.removeListener(job.id, sendEvent);
179+
cleanupStream();
143180
res.end();
144181

145182
} catch (error) {
@@ -165,9 +202,11 @@ export class StreamingScrapeController {
165202
}
166203
});
167204
} else {
168-
res.write(`event: error\n`);
169-
res.write(`data: ${JSON.stringify({ error: (error as Error).message })}\n\n`);
170-
res.end();
205+
if (!res.writableEnded && !res.destroyed) {
206+
res.write(`event: error\n`);
207+
res.write(`data: ${JSON.stringify({ error: (error as Error).message })}\n\n`);
208+
res.end();
209+
}
171210
}
172211
}
173212
}

apps/api/src/services/CloudflareChallengeService.ts

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,99 @@ class CloudflareChallengeService {
202202
await page.waitForTimeout(900);
203203
return this.detect(page);
204204
}
205+
206+
public async solveChallenge(page: Page): Promise<boolean> {
207+
console.log('CF: Attempting to solve Cloudflare Challenge...');
208+
try {
209+
console.log('CF: Waiting 5 seconds for initial load...');
210+
await page.waitForTimeout(5000);
211+
212+
const title = await page.title().catch(() => '');
213+
const isCfInterstitial = title.toLowerCase().includes('just a moment');
214+
215+
if (!isCfInterstitial) {
216+
const cfInput = page.locator('[name="cf-turnstile-response"]').first();
217+
try {
218+
await cfInput.waitFor({ state: 'attached', timeout: 5000 });
219+
} catch {
220+
console.log('CF: No Cloudflare Challenge input detected. Proceeding.');
221+
return false;
222+
}
223+
}
224+
225+
console.log('CF: Cloudflare challenge page detected! Waiting 3s...');
226+
await page.waitForTimeout(3000);
227+
228+
const iframeElement = page.locator("iframe[src*='challenges.cloudflare.com']").first();
229+
let box: { x: number; y: number; width: number; height: number } | null = null;
230+
231+
try {
232+
await iframeElement.waitFor({ state: 'attached', timeout: 8000 });
233+
box = await iframeElement.boundingBox();
234+
if (box) {
235+
console.log(`CF: Turnstile IFRAME bounds: x=${Math.round(box.x)} y=${Math.round(box.y)} w=${Math.round(box.width)} h=${Math.round(box.height)}`);
236+
}
237+
} catch (e) {
238+
console.log(`CF: Could not get iframe bounding box: ${(e as Error).message}`);
239+
}
240+
241+
if (!box || box.width === 0 || box.height === 0) {
242+
console.log("CF: Iframe element has no visible bounds. Trying frame URL matching...");
243+
for (const frame of page.frames()) {
244+
if (frame.url().includes("challenges.cloudflare.com")) {
245+
console.log(`CF: Found CF frame: ${frame.url().substring(0, 100)}`);
246+
try {
247+
const frameElement = await frame.frameElement();
248+
box = await frameElement.boundingBox();
249+
if (box) {
250+
console.log(`CF: Frame element bounds: x=${Math.round(box.x)} y=${Math.round(box.y)} w=${Math.round(box.width)} h=${Math.round(box.height)}`);
251+
break;
252+
}
253+
} catch (e2) {
254+
console.log(`CF: frameElement() fallback failed: ${(e2 as Error).message}`);
255+
}
256+
}
257+
}
258+
}
259+
260+
if (box && box.width > 0 && box.height > 0) {
261+
const xTarget = box.x + 30 + (Math.random() * 10 - 5);
262+
const yTarget = box.y + (box.height / 2) + (Math.random() * 6 - 3);
263+
264+
console.log(`CF: Clicking checkbox at (${xTarget.toFixed(1)}, ${yTarget.toFixed(1)})...`);
265+
266+
const steps = Math.floor(Math.random() * 9) + 10; // 10 to 18 steps
267+
await page.mouse.move(xTarget, yTarget, { steps });
268+
269+
const hoverDelay = Math.random() * 250 + 150; // 150 to 400ms
270+
await page.waitForTimeout(hoverDelay);
271+
272+
await page.mouse.down();
273+
const clickDuration = Math.floor(Math.random() * 81) + 50; // 50 to 130ms
274+
await page.waitForTimeout(clickDuration);
275+
await page.mouse.up();
276+
277+
console.log("CF: Click executed. Waiting 8s for verification...");
278+
await page.waitForTimeout(8000);
279+
280+
const newTitle = await page.title().catch(() => '');
281+
if (!newTitle.toLowerCase().includes("just a moment")) {
282+
console.log("CF: Challenge appears solved! Page title changed.");
283+
return true;
284+
} else {
285+
console.log("CF: Page title still shows challenge. May need retry.");
286+
}
287+
} else {
288+
console.log("CF: FAILED - Could not find any clickable Turnstile iframe.");
289+
}
290+
291+
} catch (e) {
292+
console.error(`CF: Exception during bypass:`, e);
293+
}
294+
295+
console.log("CF: Proceeding without successful bypass or bypass failed.");
296+
return false;
297+
}
205298
}
206299

207300
export const cloudflareChallengeService = CloudflareChallengeService.getInstance();

apps/api/src/services/ScraperService.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,24 @@ class ScraperService {
8888
}
8989

9090
private async ensureNoCloudflareChallenge(page: Page, jsEnabled = false) {
91-
const detection = await cloudflareChallengeService.detectAfterNavigation(page, {
91+
let detection = await cloudflareChallengeService.detectAfterNavigation(page, {
9292
recheck: jsEnabled
9393
});
9494

9595
if (detection.detected) {
96+
console.log('🛡️ Cloudflare protection detected. Attempting automated bypass...');
97+
const bypassSuccess = await cloudflareChallengeService.solveChallenge(page);
98+
99+
if (bypassSuccess) {
100+
console.log('✅ Cloudflare protection automated bypass successful!');
101+
// Re-verify that the page is actually clear after the solve attempt
102+
detection = await cloudflareChallengeService.detect(page);
103+
if (!detection.detected) {
104+
return; // Successfully bypassed
105+
}
106+
}
107+
108+
console.log('❌ Cloudflare protection automated bypass failed.');
96109
throw new CloudflareChallengeError(detection);
97110
}
98111
}

0 commit comments

Comments
 (0)