Permalink
Browse files

add button uniqueness type to element click selector. closes #56

1 parent b0eca02 commit 038731969e5a5a858c7fc3758ba871239b966b07 @martinsbalodis committed Sep 16, 2014
@@ -18,6 +18,8 @@ events triggered by the button.
be clicked to load more elements.
* click type - type of how the selector knows when there will be no new
elements and clicking should stop.
+ * click element uniqueness - type of how selector knows which buttons are
+ already clicked.
* multiple - multiple records are being extracted (almost always should be
checked). Multiple option for child selectors usually should not be checked.
* delay - delay before element selection and delay between clicking. This
@@ -42,6 +44,18 @@ Click More type makes the selector click on given buttons multiple times
until there are no new elements appearing. A new element is considered an
element that has unique text content.
+### Click element uniqueness
+
+When using *Click Once* only unique buttons will be clicked. When using
+*Click More* this helps to ignore buttons that don't generate more elements.
+
+ * Unique Text - buttons with identical text content are considered equal
+ * Unique HTML+Text - buttons with identical HTML and text content are
+ considered equal
+ * Unique HTML - buttons with identical HTML and stripped text content are
+ considered equal
+ * Unique CSS Selector - buttons with identical CSS Selector are considered equal
+
## Use cases
#### Navigate pagination using "Click once" selector type
@@ -89,6 +89,19 @@
</div>
</div>
+ <!-- clickElementUniquenessType -->
+ <div class="form-group feature feature-clickElementUniquenessType">
+ <label for="clickElementUniquenessType" class="col-lg-1 control-label">Click element uniqueness</label>
+ <div class="input-group col-lg-10">
+ <select class="form-control" id="clickElementUniquenessType" name="clickElementUniquenessType">
+ <option value="uniqueText">Unique Text</option>
+ <option value="uniqueHTMLText">Unique HTML+Text</option>
+ <option value="uniqueHTML">Unique HTML</option>
+ <option value="uniqueCSSSelector">unique CSS Selector</option>
+ </select>
+ </div>
+ </div>
+
<div class="form-group feature feature-multiple">
<div class="col-lg-offset-1 col-lg-10">
<div class="checkbox">
@@ -788,6 +788,10 @@ SitemapController.prototype = {
if(selector.clickType) {
$editSelectorForm.find("[name=clickType]").val(selector.clickType);
}
+ // set clickElementUniquenessType
+ if(selector.clickElementUniquenessType) {
+ $editSelectorForm.find("[name=clickElementUniquenessType]").val(selector.clickElementUniquenessType);
+ }
// handle selects seperately
$editSelectorForm.find("[name=type]").val(selector.type);
@@ -851,6 +855,7 @@ SitemapController.prototype = {
var tableHeaderRowSelector = $("#edit-selector [name=tableHeaderRowSelector]").val();
var clickElementSelector = $("#edit-selector [name=clickElementSelector]").val();
var type = $("#edit-selector [name=type]").val();
+ var clickElementUniquenessType = $("#edit-selector [name=clickElementUniquenessType]").val();
var clickType = $("#edit-selector [name=clickType]").val();
var discardInitialElements = $("#edit-selector [name=discardInitialElements]").is(":checked");
var multiple = $("#edit-selector [name=multiple]").is(":checked");
@@ -882,6 +887,7 @@ SitemapController.prototype = {
tableHeaderRowSelector: tableHeaderRowSelector,
tableDataRowSelector: tableDataRowSelector,
clickElementSelector: clickElementSelector,
+ clickElementUniquenessType: clickElementUniquenessType,
clickType: clickType,
discardInitialElements: discardInitialElements,
type: type,
@@ -95,6 +95,16 @@ var SelectorElementClick = {
}
},
+ getClickElementUniquenessType: function() {
+
+ if(this.clickElementUniquenessType === undefined) {
+ return 'uniqueText';
+ }
+ else {
+ return this.clickElementUniquenessType;
+ }
+ },
+
getDataClickOnce: function(parentElement) {
var delay = parseInt(this.delay) || 0;
@@ -105,12 +115,12 @@ var SelectorElementClick = {
var deferredResultCalls = [];
// will be clicking all click buttons with unique texts
- var clickedButtons = {};
+ var doneClickingElements = new UniqueElementList(this.getClickElementUniquenessType());
+
var extractElementsAfterUniqueButtonClick = function(button) {
- var buttonText = $(button).text().trim();
- if(!(buttonText in clickedButtons)) {
- clickedButtons[buttonText] = true;
+ if(!doneClickingElements.isAdded(button)) {
+ doneClickingElements.push(button);
deferredResultCalls.push(function() {
@@ -136,7 +146,7 @@ var SelectorElementClick = {
var deferredResponse = $.Deferred();
$.whenCallSequentially(deferredResultCalls).done(function(results) {
- var dataElements = [];
+ var dataElements = new UniqueElementList("uniqueText");
// elements that we got after clicking
results.forEach(function(elements) {
@@ -161,23 +171,23 @@ var SelectorElementClick = {
var delay = parseInt(this.delay) || 0;
var deferredResponse = $.Deferred();
- var foundElements = new UniqueElementList();
+ var foundElements = new UniqueElementList('uniqueText');
var clickElements = this.getClickElements(parentElement);
- var doneClickingElements = new UniqueElementList();
+ var doneClickingElements = new UniqueElementList(this.getClickElementUniquenessType());
// add elements that are available before clicking
var elements = this.getDataElements(parentElement);
elements.forEach(foundElements.push.bind(foundElements));
// discard initial elements
if(this.discardInitialElements) {
- foundElements = new UniqueElementList();
+ foundElements = new UniqueElementList('uniqueText');
}
// no elements to click at the beginning
if(clickElements.length === 0) {
deferredResponse.resolve(foundElements);
- return;
+ return deferredResponse.promise();
}
// initial click and wait
@@ -239,6 +249,6 @@ var SelectorElementClick = {
},
getFeatures: function () {
- return ['multiple', 'delay', 'clickElementSelector', 'clickType', 'discardInitialElements']
+ return ['multiple', 'delay', 'clickElementSelector', 'clickType', 'discardInitialElements', 'clickElementUniquenessType']
}
};
@@ -1,8 +1,9 @@
/**
- * Only Elements with unique text will be added to this array
+ * Only Elements unique will be added to this array
* @constructor
*/
-UniqueElementList = function() {
+UniqueElementList = function(clickElementUniquenessType) {
+ this.clickElementUniquenessType = clickElementUniquenessType;
this.addedElements = {};
};
@@ -14,22 +15,60 @@ UniqueElementList.prototype.push = function(element) {
return false;
}
else {
- var elementTxt = this.getElementText(element);
- this.addedElements[elementTxt] = true;
+ var elementUniqueId = this.getElementUniqueId(element);
+ this.addedElements[elementUniqueId] = true;
Array.prototype.push.call(this, $(element).clone(true)[0]);
return true;
}
};
-UniqueElementList.prototype.getElementText = function(element) {
+UniqueElementList.prototype.getElementUniqueId = function(element) {
- var elementTxt = $(element).text().trim();
- return elementTxt;
+ if(this.clickElementUniquenessType === 'uniqueText') {
+ var elementText = $(element).text().trim();
+ return elementText;
+ }
+ else if(this.clickElementUniquenessType === 'uniqueHTMLText') {
+
+ var elementHTML = $("<div class='-web-scraper-should-not-be-visible'>").append($(element).eq(0).clone()).html();
+ return elementHTML;
+ }
+ else if(this.clickElementUniquenessType === 'uniqueHTML') {
+
+ // get element without text
+ var $element = $(element).eq(0).clone();
+
+ var removeText = function($element) {
+ $element.contents()
+ .filter(function() {
+ if(this.nodeType !== 3) {
+ removeText($(this));
+ }
+ return this.nodeType == 3; //Node.TEXT_NODE
+ }).remove();
+ };
+ removeText($element);
+
+ var elementHTML = $("<div class='-web-scraper-should-not-be-visible'>").append($element).html();
+ return elementHTML;
+ }
+ else if(this.clickElementUniquenessType === 'uniqueCSSSelector') {
+ var cs = new CssSelector({
+ enableSmartTableSelector: false,
+ parent: $("body")[0],
+ enableResultStripping:false
+ });
+ var CSSSelector = cs.getCssSelector([element]);
+ return CSSSelector;
+ }
+ else {
+ throw "Invalid clickElementUniquenessType "+this.clickElementUniquenessType;
+ }
};
UniqueElementList.prototype.isAdded = function(element) {
- var elementTxt = this.getElementText(element);
- var isAdded = elementTxt in this.addedElements;
+ var elementUniqueId = this.getElementUniqueId(element);
+ var isAdded = elementUniqueId in this.addedElements;
return isAdded;
};
@@ -13,7 +13,63 @@ describe("UniqueElementList", function () {
$el.html("<a>1</a><a>2</a>");
- var list = new UniqueElementList();
+ var list = new UniqueElementList('uniqueText');
+ expect(list.length).toEqual(0);
+
+ var $a = $el.find("a");
+ list.push($a[0]);
+ expect(list.length).toEqual(1);
+ list.push($a[0]);
+ expect(list.length).toEqual(1);
+ list.push($a[1]);
+ expect(list.length).toEqual(2);
+ list.push($a[1]);
+ expect(list.length).toEqual(2);
+ });
+
+ it("it should add only unique elements when using uniqueHTMLText type", function () {
+
+ $el.html("<a id='1'>a</a><a id='2'>a</a>");
+
+ var list = new UniqueElementList('uniqueHTMLText');
+ expect(list.length).toEqual(0);
+
+ var $a = $el.find("a");
+ list.push($a[0]);
+ expect(list.length).toEqual(1);
+ list.push($a[0]);
+ expect(list.length).toEqual(1);
+ list.push($a[1]);
+ expect(list.length).toEqual(2);
+ list.push($a[1]);
+ expect(list.length).toEqual(2);
+ });
+
+ it("it should add only unique elements when using uniqueHTML type", function () {
+
+ $el.html("<a class='1'>a<span>a</span></a><a class='2'>a<span>b</span></a><a class='1'>c<span>c</span></a>");
+
+ var list = new UniqueElementList('uniqueHTML');
+ expect(list.length).toEqual(0);
+
+ var $a = $el.find("a");
+ list.push($a[0]);
+ expect(list.length).toEqual(1);
+ list.push($a[0]);
+ expect(list.length).toEqual(1);
+ list.push($a[1]);
+ expect(list.length).toEqual(2);
+ list.push($a[1]);
+ expect(list.length).toEqual(2);
+ list.push($a[2]);
+ expect(list.length).toEqual(2);
+ });
+
+ it("it should add only unique elements when using uniqueCSSSelector type", function () {
+
+ $el.html("<a></a><a></a>");
+
+ var list = new UniqueElementList('uniqueCSSSelector');
expect(list.length).toEqual(0);
var $a = $el.find("a");

0 comments on commit 0387319

Please sign in to comment.