Skip to content

Commit

Permalink
Fix timestamp links in Youtube video descriptions
Browse files Browse the repository at this point in the history
For some reason, comments were handled in NewPipeExtractor as text,
sent to NewPipe, and timestamps converted to hyperlinks using Linkify:
TeamNewPipe/NewPipe#2168

On the other hand,
video descriptions were handled in NewPipeExtractor as HTML.
There, timestamp links were previously mangled (and now properly parsed),
before being sent to NewPipe as HTML (where it's converted to Spanned).
  • Loading branch information
nyanpasu64 committed Aug 18, 2019
1 parent 430da57 commit 69ea009
Showing 1 changed file with 44 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
* Created by Christian Schabesberger on 06.08.15.
Expand Down Expand Up @@ -162,14 +164,54 @@ public String getDescription() throws ParsingException {
}
}

// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
// :00 is NOT recognized as a timestamp in description or comments.
// 0:00 is recognized in both description and comments.
// https://www.youtube.com/watch?v=4cccfDXu1vA
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
"seekTo\\("
+ "(?:(\\d+)\\*3600\\+)?" // hours?
+ "(\\d+)\\*60\\+" // minutes?
+ "(\\d+)" // seconds
+ "\\)");

@SafeVarargs
private static <T> T coalesce(T... args) {
for (T arg : args) {
if (arg != null) return arg;
}
throw new IllegalArgumentException("all arguments to coalesce() were null");
}

private String parseHtmlAndGetFullLinks(String descriptionHtml)
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
final Document description = Jsoup.parse(descriptionHtml, getUrl());
for(Element a : description.select("a")) {
final String rawUrl = a.attr("abs:href");
final URL redirectLink = new URL(rawUrl);
final String queryString = redirectLink.getQuery();
if(queryString != null) {

final Matcher onClickTimestamp;
final String queryString;
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
.find()) {
a.removeAttr("onclick");

String hours = coalesce(onClickTimestamp.group(1), "0");
String minutes = onClickTimestamp.group(2);
String seconds = onClickTimestamp.group(3);

int timestamp = 0;
timestamp += Integer.parseInt(hours) * 3600;
timestamp += Integer.parseInt(minutes) * 60;
timestamp += Integer.parseInt(seconds);

String setTimestamp = "&t=" + timestamp;

// Even after clicking https://youtu.be/...?t=6,
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
a.attr("href", getUrl() + setTimestamp);

} else if((queryString = redirectLink.getQuery()) != null) {
// if the query string is null we are not dealing with a redirect link,
// so we don't need to override it.
final String link =
Expand Down

0 comments on commit 69ea009

Please sign in to comment.