Skip to content

Commit

Permalink
Fixed the RipMeApp#1777 issue with Xhamster support.
Browse files Browse the repository at this point in the history
Just improving the bug-fix. TO-DO: Need to check the test cases.
  • Loading branch information
mukulj77 committed Nov 20, 2020
1 parent 2cb978f commit c1f98a1
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 12 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,4 @@ history.json
.classpath
*.txt
bin/
/bin/
12 changes: 12 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@
<artifactId>jsoup</artifactId>
<version>1.8.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.11.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.11.3</version>
</dependency>
<dependency>
<groupId>org.graalvm.js</groupId>
<artifactId>js</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,23 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.core.type.TypeReference;

// WARNING
// This ripper changes all requests to use the MOBILE version of the site
Expand Down Expand Up @@ -138,18 +145,70 @@ private boolean isVideoUrl(URL url) {
}

@Override
public Document getNextPage(Document doc) throws IOException {
if (doc.select("a[rel=next]").first() != null) {
String nextPageUrl = doc.select("a[rel=next]").first().attr("href");
if (nextPageUrl.startsWith("http")) {
nextPageUrl = nextPageUrl.replaceAll("https?://\\w?\\w?\\.?xhamster\\.", "https://m.xhamster.");
nextPageUrl = nextPageUrl.replaceAll("https?://xhamster2\\.", "https://m.xhamster2.");
return Http.url(nextPageUrl).get();
}
}
throw new IOException("No more pages");
public Document getNextPage(Document doc) throws IOException {
Elements scriptElements = doc.select("#initials-script");
String nodeDataStr = "";
String nextPageUrl = "";
// For retrieving the 'next Page Url' from the JavaScript embedded in script tag
for (Element element : scriptElements) {
nodeDataStr = "";
for (DataNode node : element.dataNodes()) {
nodeDataStr = node.getWholeData();
// System.out.println(node.getWholeData());
if (String.valueOf(node.getWholeData()).startsWith("window.initials")) {
// for implicitly converting the embedded JS code to JSON for extracting Url.
// The 1 is added to endIndex to include the closing curly braces in JSON string.
String jsonStr = String.valueOf(nodeDataStr).substring(nodeDataStr.indexOf("{"),
nodeDataStr.lastIndexOf("}")+1);
nextPageUrl = getNextPageUrl(jsonStr);
}
}
}
if (nextPageUrl.startsWith("http")) {
nextPageUrl = nextPageUrl.replaceAll("https?://\\w?\\w?\\.?xhamster\\.", "https://m.xhamster.");
nextPageUrl = nextPageUrl.replaceAll("https?://xhamster2\\.", "https://m.xhamster2.");
return Http.url(nextPageUrl).get();
}
// if (doc.select("a[rel=next]").first() != null) {
// String nextPageUrl = doc.select("a[rel=next]").first().attr("href");*/
if (nextPageUrl.startsWith("http")) {
nextPageUrl = nextPageUrl.replaceAll("https?://\\w?\\w?\\.?xhamster\\.", "https://m.xhamster.");
nextPageUrl = nextPageUrl.replaceAll("https?://xhamster2\\.", "https://m.xhamster2.");
return Http.url(nextPageUrl).get();
}
// }
throw new IOException("No more pages");

}
}

private String getNextPageUrl(String jsonStr) {
String nextPageUrl = "";
try {
ObjectMapper mapper = new ObjectMapper();
Map<String, Object> jsonMap = new HashMap<>();
jsonMap = mapper.readValue(jsonStr, new TypeReference<Map<String, Object>>(){}); // converts JSON to Map
System.out.println(jsonMap);
if(jsonMap.containsKey("pagination") && jsonMap.get("pagination") instanceof LinkedHashMap<?, ?>) {
// jsonMap.get("pagination") instanceof LinkedHashMap

@SuppressWarnings("unchecked")
Map<String, String> pagination = (LinkedHashMap<String, String>) jsonMap.get("pagination");
if(pagination.containsKey("active") && pagination.containsKey("next") && pagination.containsKey("pageLinkTemplate")) {
int active = Integer.valueOf(String.valueOf(pagination.get("active")));
int next = Integer.valueOf(String.valueOf(pagination.get("next")));
int maxPages = Integer.valueOf(String.valueOf(pagination.get("maxPages")));
int maxPage = Integer.valueOf(String.valueOf(pagination.get("maxPage")));
if(active < maxPages || active < maxPage) {
nextPageUrl = String.valueOf(pagination.get("pageLinkTemplate")).replaceAll("\\{#\\}", String.valueOf(next));
}

}
}
} catch(IOException ie) {
ie.printStackTrace();
}
return nextPageUrl;
}

@Override
public List<String> getURLsFromPage(Document doc) {
Expand Down Expand Up @@ -193,7 +252,7 @@ private void downloadFile(String url) {
LOGGER.error("The url \"" + url + "\" is malformed");
}
}

@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
try {
Expand Down

0 comments on commit c1f98a1

Please sign in to comment.