New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Xray(...).abort is not a function #270
Comments
I couldn't figure out how the abort functions works but meanwhile I came up with an own solutions. It's not really straight forward but at least it works :-) const rp = require('request-promise');
const Xray = require('x-ray');
const x = Xray({
filters: {
correctURI: function(value) {
return typeof value === 'string'
? `https://www.amazon.de${value}`
: value;
}
}
});
const MAX_PAGES = 3;
const getPage = url => {
return rp({ url }).then(html => {
return new Promise((resolve, reject) => {
const result = x(html, {
pagnResult: x(html, '.s-result-item.celwidget', [
{
asin: '@data-asin'
}
]),
pagnNextLink: '#pagnNextLink@href | correctURI'
})((err, result) => {
if (err) reject(err);
if (!err) resolve(result);
});
});
});
};
function abort() {
// some useful conditition
return false;
}
async function scrapeNpages(book) {
let i,
results = [],
refererrs = [];
refererrs.push(
`https://www.amazon.de/s?&field-keywords=${encodeURIComponent(book)}`
);
for (i = 0; i < MAX_PAGES; ++i) {
try {
const result = await getPage(refererrs[i]);
refererrs.push(result.pagnNextLink);
results.push(result.pagnResult);
if (abort()) break;
} catch (err) {
console.log(err);
}
}
return Array.prototype.concat(...results);
}
scrapeNpages('harry potter').then(res => console.log(res)); |
In case anyone else comes up against this problem (I just came back to this repo after many months out of the scraping world and the "abort" method was my work), here's a working sample from my implementation of abort: var xray = require('x-ray'),
x = xray(),
moment = require('moment');
function scrape(data) {
x(data.url, '.review', [{
title: '.review-title',
content: '.review-text',
id: '@id',
rating: '.review-rating',
date: '.review-date',
reviewer: {
name: '.author',
id: '.author@href'
}
}])
.paginate('.a-pagination .a-last a@href')
.abort((result, url) => {
for(let i = 0; i < result.length; i++) {
let dateStr = result[i].date.replace('on ', '');
let date = moment(dateStr, 'MMMM D, YYYY');
if(date.isBefore(moment().startOf('day'))) {
return true;
}
}
return false;
});
} If this doesn't work, make sure you have the correct version of x-ray installed. :) |
Subject of the issue
The documentation says that the abort method accepts a callback function with two arguments. I've tried a minimal example but it didn't work. I'm not sure how to use the "abort" function.
Could someone provide a minimal example how to use this function?
I've tried following code to understand the abort method but I'm getting an error.
Error:
My environment
The text was updated successfully, but these errors were encountered: