Skip to content

Commit

Permalink
fix: parsing logic
Browse files Browse the repository at this point in the history
  • Loading branch information
lovegaoshi committed Sep 7, 2023
1 parent 60f1324 commit a64058b
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 42 deletions.
3 changes: 1 addition & 2 deletions lib/info-extras.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const utils = require('./utils');
//const qs = require('querystring');
import qs from 'query-string';
const qs = require('querystring');
const { parseTimestamp } = require('./__REACT_NATIVE_YTDL_CUSTOM_MODULES__/m3u8stream');


Expand Down
3 changes: 1 addition & 2 deletions lib/info.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//const querystring = require('querystring');
import querystring from 'query-string';
const querystring = require('querystring');
const sax = require('./__REACT_NATIVE_YTDL_CUSTOM_MODULES__/sax');
const miniget = require('./__REACT_NATIVE_YTDL_CUSTOM_MODULES__/miniget');
const utils = require('./utils');
Expand Down
122 changes: 84 additions & 38 deletions lib/sig.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
/* eslint-disable no-new-func */
//const querystring = require('querystring');
import querystring from 'query-string';
const querystring = require('querystring');
const Cache = require('./cache');
const utils = require('./utils');

let nTransformWarning = false;

// A shared cache to keep track of html5player js functions.
exports.cache = new Cache();

Expand All @@ -24,6 +25,48 @@ exports.getFunctions = (html5playerfile, options) => exports.cache.getOrSet(html
return functions;
});

// eslint-disable-next-line max-len
// https://github.com/TeamNewPipe/NewPipeExtractor/blob/41c8dce452aad278420715c00810b1fed0109adf/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java#L816
const DECIPHER_REGEXPS = [
'(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)' +
'\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*""\\s*\\)',
'\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)',
'\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)',
'([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(""\\)\\s*;',
'\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(""\\)\\s*;',
'\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(',
];

const DECIPHER_ARGUMENT = 'sig';
const N_ARGUMENT = 'ncode';

const matchGroup1 = (regex, str) => {
const match = str.match(new RegExp(regex));
if (!match) throw new Error(`Could not match ${regex}`);
return match[1];
};

const getFuncName = (body, regexps) => {
try {
let fn;
for (const regex of regexps) {
try {
fn = matchGroup1(regex, body);
const idx = fn.indexOf('[0]');
if (idx > -1) fn = matchGroup1(`${fn.slice(0, 3)}=\\[([a-zA-Z0-9$\\[\\]]{2,})\\]`, body);
} catch (err) {
continue;
}
}
if (!fn || fn.includes('[')) throw Error("Couldn't find fn name");
return fn;
} catch (e) {
throw Error(`Please open an issue on ytdl-core GitHub: ${e.message}`);
}
};

const getDecipherFuncName = body => getFuncName(body, DECIPHER_REGEXPS);

/**
* Extracts the actions that should be taken to decipher a signature
* and tranform the n parameter
Expand All @@ -32,44 +75,45 @@ exports.getFunctions = (html5playerfile, options) => exports.cache.getOrSet(html
* @returns {Array.<string>}
*/
exports.extractFunctions = body => {
body = body.replace(/\n|\r/g, '');
const functions = [];
const extractManipulations = caller => {
const functionName = utils.between(caller, `a=a.split("");`, `.`);
if (!functionName) return '';
const functionStart = `var ${functionName}={`;
const ndx = body.indexOf(functionStart);
if (ndx < 0) return '';
const subBody = body.slice(ndx + functionStart.length - 1);
return `var ${functionName}=${utils.cutAfterJS(subBody)}`;
};
// This is required function, so we can't continue if it's not found.
const extractDecipher = () => {
const functionName = utils.between(body, `a.set("alr","yes");c&&(c=`, `(decodeURIC`);
if (functionName && functionName.length) {
const functionStart = `${functionName}=function(a)`;
const ndx = body.indexOf(functionStart);
if (ndx >= 0) {
const subBody = body.slice(ndx + functionStart.length);
let functionBody = `var ${functionStart}${utils.cutAfterJS(subBody)}`;
functionBody = `${extractManipulations(functionBody)};${functionBody};${functionName}(sig);`;
functions.push(functionBody);
}
const decipherFuncName = getDecipherFuncName(body);
try {
const functionPattern = `(${decipherFuncName.replace(/\$/g, '\\$')}=function\\([a-zA-Z0-9_]+\\)\\{.+?\\})`;
const decipherFunction = `var ${matchGroup1(functionPattern, body)};`;
const helperObjectName = matchGroup1(';([A-Za-z0-9_\\$]{2,})\\.\\w+\\(', decipherFunction)
.replace(/\$/g, '\\$');
const helperPattern = `(var ${helperObjectName}=\\{[\\s\\S]+?\\}\\};)`;
const helperObject = matchGroup1(helperPattern, body);
const callerFunction = `${decipherFuncName}(${DECIPHER_ARGUMENT});`;
const resultFunction = helperObject + decipherFunction + callerFunction;
functions.push(resultFunction);
} catch (err) {
throw Error(`Could not parse decipher function: ${err}`);
}
};
const extractNCode = () => {
let functionName = utils.between(body, `&&(b=a.get("n"))&&(b=`, `(b)`);
if (functionName.includes('[')) functionName = utils.between(body, `var ${functionName.split('[')[0]}=[`, `]`);
if (functionName && functionName.length) {
const functionStart = `${functionName}=function(a)`;
const ndx = body.indexOf(functionStart);
if (ndx >= 0) {
const subBody = body.slice(ndx + functionStart.length);
const functionBody = `var ${functionStart}${utils.cutAfterJS(subBody)};${functionName}(ncode);`;
functions.push(functionBody);
// This is optional, so we can continue if it's not found, but it will bottleneck the download.
const extractNTransform = () => {
let nFuncName = utils.between(body, `(b=a.get("n"))&&(b=`, `(b)`);
if (nFuncName.includes('[')) nFuncName = utils.between(body, `${nFuncName.split('[')[0]}=[`, `]`);
if (nFuncName && nFuncName.length) {
const nBegin = `${nFuncName}=function(a)`;
const nEnd = '.join("")};';
const nFunction = utils.between(body, nBegin, nEnd);
if (nFunction) {
const callerFunction = `${nFuncName}(${N_ARGUMENT});`;
const resultFunction = nBegin + nFunction + nEnd + callerFunction;
functions.push(resultFunction);
} else if (!nTransformWarning) {
console.warn('Could not parse n transform function, please report it on @distube/ytdl-core GitHub.');
nTransformWarning = true;
}
}
};
extractDecipher();
extractNCode();
extractNTransform();
return functions;
};

Expand All @@ -83,23 +127,25 @@ exports.extractFunctions = body => {
exports.setDownloadURL = (format, decipherScript, nTransformScript) => {
const decipher = url => {
const args = querystring.parse(url);
if (!args.s || !decipherScript) return args.url;
if (!args.s) return args.url;
const components = new URL(decodeURIComponent(args.url));
// console.log(decipherScript(decodeURIComponent(args.s)))
components.searchParams.set(args.sp ? args.sp : 'signature',
decipherScript(decodeURIComponent(args.s)));
const context = {};
context[DECIPHER_ARGUMENT] = decodeURIComponent(args.s);
components.searchParams.set(args.sp || 'sig', decipherScript(args.s));
return components.toString();
};
const ncode = url => {
const nTransform = url => {
const components = new URL(decodeURIComponent(url));
const n = components.searchParams.get('n');
if (!n || !nTransformScript) return url;
const context = {};
context[N_ARGUMENT] = n;
components.searchParams.set('n', nTransformScript(n));
return components.toString();
};
const cipher = !format.url;
const url = format.url || format.signatureCipher || format.cipher;
format.url = cipher ? ncode(decipher(url)) : ncode(url);
format.url = cipher ? nTransform(decipher(url)) : nTransform(url);
delete format.signatureCipher;
delete format.cipher;
};
Expand Down

0 comments on commit a64058b

Please sign in to comment.