Skip to content
This repository was archived by the owner on Aug 15, 2023. It is now read-only.

Commit c19a3b9

Browse files
committed
fix(tktrex): downloader metadata format, trimmed verbosity, execution duplication
1 parent d311c04 commit c19a3b9

File tree

4 files changed

+23
-17
lines changed

4 files changed

+23
-17
lines changed

platforms/tktrex/backend/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"tto:start": "DEBUG=\"*,-express:*,-body-parser:*,-send\" ts-node bin/observatory",
1212
"watch": "key=fuffa DEBUG=\"*,-express:*,-body-parser:*,-send\" ts-node-dev -r tsconfig-paths/register --respawn --transpile-only bin/server",
1313
"start": "DEBUG=\"*,-body-parser:*,-express:*,-lib:cache,-send\" ts-node bin/server",
14-
"parserv": "DEBUG=\"*\" ts-node bin/parser",
14+
"parserv": "DEBUG=\"*,-@trex:htmls:debug\" ts-node bin/parser",
1515
"parserv:watch": "DEBUG=\"*\" ts-node-dev -r tsconfig-paths/register bin/parser"
1616
},
1717
"author": "https://github.com/tracking-exposed/tktrex/graphs/contributors",

platforms/tktrex/backend/parsers/description.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ function videoDescriptionGuess(envelop) {
1313
});
1414
let retval = null;
1515
if (uno) {
16-
debug("first condition happened in this 'video'");
16+
debug('format kind (1): %s', uno.textContent);
1717
retval = { description: uno.textContent };
1818
} else if (due) {
19-
debug("second condition happened in this 'video'");
19+
debug('format kind (2): %s', due.textContent);
2020
retval = { description: due.textContent };
2121
} else if (tre && treTopSize.length) {
2222
debug(
23-
"third condition happened in this 'video', picking the first of %j",
23+
'format kind (3), picking the first of %j',
2424
_.compact(
2525
_.map(treTopSize, function (o) {
2626
return o.getAttribute('alt');
@@ -29,7 +29,7 @@ function videoDescriptionGuess(envelop) {
2929
);
3030
retval = { description: treTopSize[0].getAttribute('alt') };
3131
} else {
32-
debug("Failure condition in this 'video'!");
32+
debug('All the extraction approaches failed');
3333
return null;
3434
}
3535
return retval;

platforms/tktrex/backend/parsers/downloader.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import { ParserFn } from '@shared/providers/parser.provider';
2+
import _ from 'lodash';
23
import D from 'debug';
34
import { HTMLSource } from '../lib/parser';
45
import { getUUID, download } from './shared';
56
import fs from 'fs';
67
import path from 'path';
78

8-
const debug = D('parser:native');
9+
const debug = D('parser:downloader');
910

1011
// eslint-disable-next-line @typescript-eslint/explicit-function-return-type
1112
async function processLink({ link, linkType }: { link: any; linkType: any }) {
@@ -75,12 +76,21 @@ const downloader: ParserFn<HTMLSource, any> = async (envelop, findings) => {
7576
const retval = [];
7677
for (const img of imageNodes) {
7778
const url = img.getAttribute('src');
79+
const alt = img.getAttribute('alt');
80+
7881
const info = await processLink({ link: url, linkType: 'thumbnail' });
82+
info.url = url;
83+
if (alt?.length) info.alt = alt;
84+
7985
retval.push(info);
8086
}
8187

82-
debug('processing native video entry %O %O', envelop, findings);
83-
return retval;
88+
debug(
89+
'reported as downloaded %d links (%O)',
90+
retval.length,
91+
_.countBy(retval, 'reason')
92+
);
93+
return { downloader: retval };
8494
};
8595

8696
/*

platforms/tktrex/backend/parsers/native.ts

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import { ParserFn } from '@shared/providers/parser.provider';
22
import D from 'debug';
3-
import _ from 'lodash';
43
import { HTMLSource } from '../lib/parser';
54
import parseAuthor from './author';
6-
import parseDescription from './description';
75
import parseHashtags from './hashtags';
86
import parseMusic from './music';
97
import parseMetrics from './numbers';
@@ -14,23 +12,21 @@ const parseNativeVideo: ParserFn<HTMLSource, any> = async (
1412
envelop,
1513
findings
1614
) => {
17-
debug(
18-
'processing native video entry %O %O',
19-
_.omit(envelop, ['html.html', 'jsdom']),
20-
findings
21-
);
22-
2315
if (envelop.html.type !== 'native') {
2416
debug('entry is not "native" (%s)', envelop.html.type);
2517
return null;
2618
}
19+
debug('processing native video entry: %s', envelop.html.href);
2720

2821
const music = await parseMusic(envelop, findings);
2922
const author = await parseAuthor(envelop, findings);
30-
const description = await parseDescription(envelop, findings);
3123
const metrics = await parseMetrics(envelop, findings);
3224
const hashtags = await parseHashtags(envelop, findings);
3325

26+
// description is already available at this point!
27+
// const description = await parseDescription(envelop, findings);
28+
const description = findings.description;
29+
3430
return {
3531
nature: { type: 'native' },
3632
type: 'native',

0 commit comments

Comments
 (0)