Skip to content

Commit

Permalink
fix: Actualiser le scraper de TikTok.
Browse files Browse the repository at this point in the history
  • Loading branch information
regseb committed Jan 29, 2024
1 parent 9bee8b7 commit 80b0556
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 25 deletions.
9 changes: 5 additions & 4 deletions src/core/scraper/tiktok.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@ import { matchPattern } from "../tools/matchpattern.js";
*/
const action = async function (_url, metadata) {
const doc = await metadata.html();
const script = doc.querySelector("script#SIGI_STATE");
const script = doc.querySelector(
"script#__UNIVERSAL_DATA_FOR_REHYDRATION__",
);
if (null === script) {
return undefined;
}

const json = JSON.parse(script.text);
return "ItemModule" in json
? Object.values(json.ItemModule)[0].video.playAddr
: undefined;
return json["__DEFAULT_SCOPE__"]["webapp.video-detail"]?.itemInfo.itemStruct
.video.playAddr;
};
export const extract = matchPattern(action, "*://www.tiktok.com/*");
19 changes: 13 additions & 6 deletions test/unit/core/scraper/lemonde.js
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,20 @@ describe("core/scraper/lemonde.js", function () {
const stub = sinon.stub(globalThis, "fetch").resolves(
new Response(
`<html><body>
<script id="SIGI_STATE">${JSON.stringify({
AppContext: {},
ItemModule: [
{
video: { playAddr: "http://foo.io/bar.mp4" },
<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__"
>${JSON.stringify({
__DEFAULT_SCOPE__: {
"webapp.video-detail": {
itemInfo: {
itemStruct: {
video: {
playAddr:
"http://foo.io/bar.mp4",
},
},
},
},
],
},
})}</script>
</body></html>`,
{ headers: { "Content-Type": "text/html" } },
Expand Down
36 changes: 21 additions & 15 deletions test/unit/core/scraper/tiktok.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ describe("core/scraper/tiktok.js", function () {
Promise.resolve(
new DOMParser().parseFromString(
`<html><body>
<script id="SIGI_STATE">${JSON.stringify({
AppContext: {},
<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__"
>${JSON.stringify({
__DEFAULT_SCOPE__: {},
})}</script>
</body></html>`,
"text/html",
Expand All @@ -58,19 +59,24 @@ describe("core/scraper/tiktok.js", function () {
html: () =>
Promise.resolve(
new DOMParser().parseFromString(
`
<html><body>
<script id="SIGI_STATE">${JSON.stringify({
AppContext: {},
ItemModule: [
{
video: {
playAddr: "https://bar.com/baz.mp4",
},
},
],
})}</script>
</body></html>`,
`<html><body>
<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__"
>${JSON.stringify({
__DEFAULT_SCOPE__: {
"webapp.video-detail": {
itemInfo: {
itemStruct: {
video: {
playAddr:
"https://bar.com" +
"/baz.mp4",
},
},
},
},
},
})}</script>
</body></html>`,
"text/html",
),
),
Expand Down

0 comments on commit 80b0556

Please sign in to comment.