Skip to content

Commit

Permalink
feat(bookmarklet): extract youtube track names from page (#285)
Browse files Browse the repository at this point in the history
Contributes to #262. Follow up of #281 and #282.

## What does this PR do / solve?

After YouTube reduced our API quota, we resorted to specifying "(YouTube track)" as the title of tracked extracted from YouTube pages using the bookmarklet, in order to save some quota.

This PR intends to determine the actual name of the track by extracting text from the DOM, and therefore to get rid of those "(YouTube track)" titles on newly added tracks.

## Overview of changes

- completely get rid of forced "(YouTube track)" titles 🥳
- add feat: "return a track with metadata from a YouTube page that lists that track as a link"
- add feat: "return a track with the expected name when that track was found as a link from a YouTube page" (i.e. remove noise from title)
- add feat: "return the page's track with metadata from a YouTube page when the same track is also listed in the page with less metadata"
- rename `detectPlayemStreams()` --> `detectPlayableStreams()` + integrate `YOUTUBE_PLAYER` (partially imported from PlayemJS, without the API query part) in the bookmarket, and use it also from tests => first step in progressively getting rid of Playem for track detection
- normalize the way text is extracted from DOM
- cleaning up logs from bookmarklet
- simplify logic and make code a little bit more readable overall

## How to test this PR?

```sh
$ nvm use
$ node_modules/.bin/mocha test/unit/bookmarklet-tests.js
$ docker-compose up --build -d
$ npm run docker:seed && node_modules/.bin/wdio wdio.conf.js
``` 

Also, you can test the bookmarklet manually, from your web browser, by following the instructions provided in the unit test file.
  • Loading branch information
adrienjoly committed Mar 13, 2020
1 parent eb4d8e3 commit 9493f06
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 98 deletions.
179 changes: 94 additions & 85 deletions public/js/bookmarklet.js
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ function bookmarklet(window) {
}

function getNodeText(node) {
return node.innerText || node.textContent;
return (node.innerText || node.textContent || '').trim().split('\n')[0]; // keep just the first line of text (useful for suggested YouTube links that include stats on following lines)
}

function unwrapFacebookLink(src) {
Expand Down Expand Up @@ -327,18 +327,39 @@ function bookmarklet(window) {
};
}

var YOUTUBE_PLAYER = {
getEid: function(url) {
// code imported from playem-all
if (
/(youtube\.com\/(v\/|embed\/|(?:.*)?[\?\&]v=)|youtu\.be\/)([a-zA-Z0-9_\-]+)/.test(
url
) ||
/^\/yt\/([a-zA-Z0-9_\-]+)/.test(url) ||
/youtube\.com\/attribution_link\?.*v\%3D([^ \%]+)/.test(url) ||
/youtube.googleapis.com\/v\/([a-zA-Z0-9_\-]+)/.test(url)
)
return RegExp.lastParen;
},
fetchMetadata: function(url, callback) {
var id = this.getEid(url);
callback({
id: id,
eId: '/yt/' + id,
img: 'https://i.ytimg.com/vi/' + id + '/default.jpg',
url: 'https://www.youtube.com/watch?v=' + id,
playerLabel: 'Youtube'
});
}
};

function initPlayemPlayers() {
window.SOUNDCLOUD_CLIENT_ID = 'eb257e698774349c22b0b727df0238ad';
window.DEEZER_APP_ID = 190482;
window.DEEZER_CHANNEL_URL = urlPrefix + '/html/deezer.channel.html';
window.JAMENDO_CLIENT_ID = 'c9cb2a0a';
window.YOUTUBE_API_KEY = ''; // see https://github.com/openwhyd/openwhyd/issues/262
return (window._whydPlayers = window._whydPlayers || {
yt: YOUTUBE_PLAYER, // instead of new YoutubePlayer(...), to save API quota (see #262)
// playem-all.js must be loaded at that point
yt: new YoutubePlayer(
{},
{ playerContainer: window.document.getElementById('videocontainer') }
),
sc: new SoundCloudPlayer({}),
vi: new VimeoPlayer({}),
dm: new DailymotionPlayer({}),
Expand All @@ -349,7 +370,7 @@ function bookmarklet(window) {
}

// players = { playerId -> { getEid(), fetchMetadata() } }
// returns detectPlayemStreams(url, cb)
// returns detectPlayableStreams(url, callback, element)
function makeStreamDetector(players) {
var eidSet = {}; // to prevent duplicates
function getPlayerId(url) {
Expand All @@ -359,46 +380,34 @@ function bookmarklet(window) {
if (eId) return i;
}
}
function detect(url, cb) {

// an urlDetector must callback with a track Object (with fields: {id, eId, title, img}) as parameter, if detected
return function detectPlayableStreams(url, cb, element) {
// 1. find the matching player and track identifier
var playerId = getPlayerId(url);
var player = playerId && players[playerId];
cb(player && '/' + playerId + '/' + player.getEid(url), player, playerId);
}
return function detectPlayemStreams(url, cb) {
detect(url, function(eid, player, playerId) {
if (!eid || eidSet[eid]) return cb();
var parts = eid.split('#');
var streamUrl = /^https?\:\/\//.test(parts[1] || '') && parts[1];
if (eidSet[parts[0]] && !streamUrl)
// i.e. store if new, overwrite if new occurence contains a streamUrl
return cb();
eidSet[parts[0]] = true;
eidSet[eid] = true;
if (!player || !player.fetchMetadata) return cb({ eId: eid });
else if (playerId === 'yt') {
// we don't fetch metadata from youtube, to save quota (see see https://github.com/openwhyd/openwhyd/issues/262)
var id = parts[0].replace('/yt/', '');
cb({
id: id,
eId: '/yt/' + id,
img: 'https://i.ytimg.com/vi/' + id + '/default.jpg',
url: 'https://www.youtube.com/watch?v=' + id,
title: '(YouTube track)',
sourceId: playerId,
sourceLabel: player.label
});
} else
player.fetchMetadata(url, function(track) {
if (track) {
track = track || {};
track.eId = track.eId || eid.substr(0, 4) + track.id; // || eid;
track.sourceId = playerId;
track.sourceLabel = player.label;
cb(track);
} else {
cb();
}
});
var eid = player && '/' + playerId + '/' + player.getEid(url);
if (!eid || eidSet[eid]) return cb();

// 2. extract the (optional) stream URL from the identifier
var parts = eid.split('#');
var streamUrl = /^https?\:\/\//.test(parts[1] || '') && parts[1];
if (eidSet[parts[0]] && !streamUrl) return cb(); // i.e. store if new, overwrite if new occurence contains a streamUrl

// 3. store the identifier, with and without stream URL, to prevent duplicates
eidSet[parts[0]] = true;
eidSet[eid] = true;
if (!player || !player.fetchMetadata) return cb({ eId: eid }); // quit if we can't enrich the metadata

// 4. try to return the track with enriched metadata
player.fetchMetadata(url, function(track) {
if (!track) return cb();
element = element || {};
track.title = track.title || element.name; // i.e. element.name could have been extracted from the page by one of the DETECTORS
track.eId = track.eId || eid.substr(0, 4) + track.id; // || eid;
track.sourceId = playerId;
track.sourceLabel = player.label;
cb(track);
});
};
}
Expand Down Expand Up @@ -468,7 +477,6 @@ function bookmarklet(window) {
var bcPrefix = '/bc/' + bc.url.split('//')[1].split('.')[0] + '/';
toDetect = bc.trackinfo.map(function(tr) {
if (tr.file) {
//console.log("-------------FILE! =>", tr.file);
var streamUrl = tr.file[Object.keys(tr.file)[0]];
return {
href: streamUrl,
Expand Down Expand Up @@ -519,49 +527,42 @@ function bookmarklet(window) {
];

function detectTracks({ window, ui, urlDetectors }) {
// an url-based detector must callback with a track Object (with fields: {id, eId, title, img}) as parameter, if detected
// an urlDetector must callback with a track Object (with fields: {id, eId, title, img}) as parameter, if detected

function detectTrack(url, cb, element) {
function detectTrack(url, element, cb) {
var remainingUrlDetectors = urlDetectors.slice();
(function processNext() {
if (!remainingUrlDetectors.length) {
cb();
} else {
//console.log('- trying detector ' + (urlDetectors.length-1));
remainingUrlDetectors.shift()(
url,
function(track) {
//console.log(' => ' + typeof track + ' ' + JSON.stringify(track))
if (track && track.id) cb(track);
else processNext();
},
element
);
}
if (!remainingUrlDetectors.length) return cb();
remainingUrlDetectors.shift()(
url,
function(track) {
if (track && track.id) cb(track);
else processNext();
},
element
);
})();
}

function detectEmbed(e, cb) {
var url = e.eId || unwrapFacebookLink(e.href || e.src || e.data || '');
//console.log(url);
if (!url) return cb && cb();
detectTrack(
url,
function(track) {
if (track && track.title) {
track.url = url;
//track.title = track.title || e.textNode || e.title || e.alt || track.eId || url; // || p.label;
if (track.sourceLabel)
track.sourceLogo =
urlPrefix +
'/images/icon-' +
track.sourceLabel.split(' ')[0].toLowerCase() +
'.png';
}
cb(track);
},
e
);
function detectEmbed(element, cb) {
var url =
element.eId ||
unwrapFacebookLink(element.href || element.src || element.data || '');
if (!url) return cb();
detectTrack(url, element, function(track) {
if (track) {
track.url = url;
track.title =
track.title || getNodeText(element) || element.title || element.alt; // || track.eId || url || p.label;
if (track.sourceLabel)
track.sourceLogo =
urlPrefix +
'/images/icon-' +
track.sourceLabel.split(' ')[0].toLowerCase() +
'.png';
}
cb(track);
});
}

function whenDone(searchThumbs) {
Expand All @@ -584,6 +585,9 @@ function bookmarklet(window) {
return undefined;
}
}
function size(elt) {
return (elt.name || getNodeText(elt) || '').length;
}
this.has = function(url) {
var normalized = normalize(url);
return normalized && !!set[normalized];
Expand All @@ -594,7 +598,11 @@ function bookmarklet(window) {
normalize(
elt.eId || unwrapFacebookLink(elt.href || elt.src || elt.data || '')
);
if (url) set[url] = elt;
if (!url) return;
var existingElt = set[url];
if (!existingElt || size(elt) > size(existingElt)) {
set[url] = elt;
}
};
this.getSortedArray = function() {
var eIds = [],
Expand All @@ -610,7 +618,7 @@ function bookmarklet(window) {

DETECTORS.map(function(detectFct) {
var results = detectFct(window) || [];
console.info('-----' + detectFct.name, '=>', results);
console.info('-----' + detectFct.name, '=>', results.length);
results.map(function(result) {
toDetect.push(result);
});
Expand Down Expand Up @@ -654,6 +662,7 @@ function bookmarklet(window) {
});
} else {
return {
YOUTUBE_PLAYER,
detectTracks,
makeFileDetector,
makeStreamDetector
Expand Down

0 comments on commit 9493f06

Please sign in to comment.