Permalink
Browse files

feat: add content format output options (#256)

  • Loading branch information...
adampash committed Feb 8, 2019
1 parent a57f29e commit 9b0664bc9109442de088cda8d61e8054e894c8bd
Showing with 1,904 additions and 23,749 deletions.
  1. +19 βˆ’6 cli.js
  2. +1,641 βˆ’2,651 dist/mercury.js
  3. +1 βˆ’20,972 dist/mercury.web.js
  4. +3 βˆ’1 package.json
  5. +1 βˆ’1 preview
  6. +2 βˆ’2 scripts/comment-on-pr.js
  7. +2 βˆ’0 scripts/find-and-replace.sh
  8. +1 βˆ’1 scripts/generate-custom-parser.js
  9. +1 βˆ’1 scripts/templates/custom-extractor-test.js
  10. +1 βˆ’1 scripts/update-fixtures.js
  11. +1 βˆ’1 src/extractors/custom/247sports.com/index.test.js
  12. +1 βˆ’1 src/extractors/custom/abcnews.go.com/index.test.js
  13. +1 βˆ’1 src/extractors/custom/deadspin.com/index.test.js
  14. +1 βˆ’1 src/extractors/custom/fandom.wikia.com/index.test.js
  15. +1 βˆ’1 src/extractors/custom/fortune.com/index.test.js
  16. +1 βˆ’1 src/extractors/custom/forward.com/index.test.js
  17. +1 βˆ’1 src/extractors/custom/fusion.net/index.test.js
  18. +1 βˆ’1 src/extractors/custom/gothamist.com/index.test.js
  19. +1 βˆ’1 src/extractors/custom/hellogiggles.com/index.test.js
  20. +1 βˆ’1 src/extractors/custom/ici.radio-canada.ca/index.test.js
  21. +1 βˆ’1 src/extractors/custom/mashable.com/index.test.js
  22. +2 βˆ’2 src/extractors/custom/medium.com/index.test.js
  23. +1 βˆ’1 src/extractors/custom/money.cnn.com/index.test.js
  24. +1 βˆ’1 src/extractors/custom/newrepublic.com/index.test.js
  25. +1 βˆ’1 src/extractors/custom/news.nationalgeographic.com/index.test.js
  26. +4 βˆ’4 src/extractors/custom/obamawhitehouse.archives.gov/index.test.js
  27. +1 βˆ’1 src/extractors/custom/observer.com/index.test.js
  28. +1 βˆ’1 src/extractors/custom/pagesix.com/index.test.js
  29. +1 βˆ’1 src/extractors/custom/people.com/index.test.js
  30. +1 βˆ’1 src/extractors/custom/qz.com/index.test.js
  31. +1 βˆ’1 src/extractors/custom/sciencefly.com/index.test.js
  32. +1 βˆ’1 src/extractors/custom/thefederalistpapers.org/index.test.js
  33. +1 βˆ’1 src/extractors/custom/thoughtcatalog.com/index.test.js
  34. +3 βˆ’1 src/extractors/custom/twitter.com/index.test.js
  35. +1 βˆ’1 src/extractors/custom/uproxx.com/index.test.js
  36. +1 βˆ’1 src/extractors/custom/www.al.com/index.test.js
  37. +1 βˆ’1 src/extractors/custom/www.americanow.com/index.test.js
  38. +1 βˆ’1 src/extractors/custom/www.androidcentral.com/index.test.js
  39. +1 βˆ’1 src/extractors/custom/www.aol.com/index.test.js
  40. +1 βˆ’1 src/extractors/custom/www.apartmenttherapy.com/index.test.js
  41. +3 βˆ’3 src/extractors/custom/www.bloomberg.com/index.test.js
  42. +1 βˆ’1 src/extractors/custom/www.broadwayworld.com/index.test.js
  43. +1 βˆ’1 src/extractors/custom/www.bustle.com/index.test.js
  44. +2 βˆ’2 src/extractors/custom/www.buzzfeed.com/index.test.js
  45. +1 βˆ’1 src/extractors/custom/www.cbssports.com/index.test.js
  46. +1 βˆ’1 src/extractors/custom/www.chicagotribune.com/index.test.js
  47. +1 βˆ’1 src/extractors/custom/www.cinemablend.com/index.test.js
  48. +1 βˆ’1 src/extractors/custom/www.cnbc.com/index.test.js
  49. +1 βˆ’1 src/extractors/custom/www.cnet.com/index.test.js
  50. +1 βˆ’1 src/extractors/custom/www.cnn.com/index.test.js
  51. +1 βˆ’1 src/extractors/custom/www.dmagazine.com/index.test.js
  52. +1 βˆ’1 src/extractors/custom/www.eonline.com/index.test.js
  53. +1 βˆ’1 src/extractors/custom/www.fastcompany.com/index.test.js
  54. +1 βˆ’1 src/extractors/custom/www.fool.com/index.test.js
  55. +1 βˆ’1 src/extractors/custom/www.fortinet.com/index.test.js
  56. +1 βˆ’1 src/extractors/custom/www.howtogeek.com/index.test.js
  57. +1 βˆ’1 src/extractors/custom/www.huffingtonpost.com/index.test.js
  58. +1 βˆ’1 src/extractors/custom/www.inquisitr.com/index.test.js
  59. +1 βˆ’1 src/extractors/custom/www.latimes.com/index.test.js
  60. +1 βˆ’1 src/extractors/custom/www.linkedin.com/index.test.js
  61. +1 βˆ’1 src/extractors/custom/www.littlethings.com/index.test.js
  62. +1 βˆ’1 src/extractors/custom/www.macrumors.com/index.test.js
  63. +1 βˆ’1 src/extractors/custom/www.mentalfloss.com/index.test.js
  64. +1 βˆ’1 src/extractors/custom/www.miamiherald.com/index.test.js
  65. +1 βˆ’1 src/extractors/custom/www.msn.com/index.test.js
  66. +1 βˆ’1 src/extractors/custom/www.msnbc.com/index.test.js
  67. +1 βˆ’1 src/extractors/custom/www.nationalgeographic.com/index.test.js
  68. +1 βˆ’1 src/extractors/custom/www.nbcnews.com/index.test.js
  69. +2 βˆ’2 src/extractors/custom/www.newyorker.com/index.test.js
  70. +1 βˆ’1 src/extractors/custom/www.nj.com/index.test.js
  71. +1 βˆ’1 src/extractors/custom/www.npr.org/index.test.js
  72. +1 βˆ’1 src/extractors/custom/www.nydailynews.com/index.test.js
  73. +3 βˆ’3 src/extractors/custom/www.nytimes.com/index.test.js
  74. +1 βˆ’1 src/extractors/custom/www.opposingviews.com/index.test.js
  75. +1 βˆ’1 src/extractors/custom/www.politico.com/index.test.js
  76. +1 βˆ’1 src/extractors/custom/www.popsugar.com/index.test.js
  77. +1 βˆ’1 src/extractors/custom/www.prospectmagazine.co.uk/index.test.js
  78. +1 βˆ’1 src/extractors/custom/www.qdaily.com/index.test.js
  79. +1 βˆ’1 src/extractors/custom/www.rawstory.com/index.test.js
  80. +1 βˆ’1 src/extractors/custom/www.recode.net/index.test.js
  81. +1 βˆ’1 src/extractors/custom/www.refinery29.com/index.test.js
  82. +1 βˆ’1 src/extractors/custom/www.reuters.com/index.test.js
  83. +1 βˆ’1 src/extractors/custom/www.rollingstone.com/index.test.js
  84. +1 βˆ’1 src/extractors/custom/www.sbnation.com/index.test.js
  85. +1 βˆ’1 src/extractors/custom/www.si.com/index.test.js
  86. +1 βˆ’1 src/extractors/custom/www.slate.com/index.test.js
  87. +1 βˆ’1 src/extractors/custom/www.theatlantic.com/index.test.js
  88. +1 βˆ’1 src/extractors/custom/www.theguardian.com/index.test.js
  89. +1 βˆ’1 src/extractors/custom/www.thepennyhoarder.com/index.test.js
  90. +1 βˆ’1 src/extractors/custom/www.thepoliticalinsider.com/index.test.js
  91. +1 βˆ’1 src/extractors/custom/www.theverge.com/index.test.js
  92. +1 βˆ’1 src/extractors/custom/www.tmz.com/index.test.js
  93. +1 βˆ’1 src/extractors/custom/www.today.com/index.test.js
  94. +7 βˆ’4 src/extractors/custom/www.usmagazine.com/index.test.js
  95. +1 βˆ’1 src/extractors/custom/www.vox.com/index.test.js
  96. +1 βˆ’1 src/extractors/custom/www.washingtonpost.com/index.test.js
  97. +1 βˆ’1 src/extractors/custom/www.westernjournalism.com/index.test.js
  98. +1 βˆ’1 src/extractors/custom/www.wired.com/index.test.js
  99. +1 βˆ’1 src/extractors/custom/www.yahoo.com/index.test.js
  100. +1 βˆ’1 src/extractors/custom/www.youtube.com/index.test.js
  101. +14 βˆ’2 src/extractors/generic/index.js
  102. +20 βˆ’3 src/extractors/root-extractor.js
  103. +67 βˆ’0 src/extractors/root-extractor.test.js
  104. +7 βˆ’2 src/mercury.js
  105. +1 βˆ’1 src/mercury.test.js
  106. +15 βˆ’2 yarn.lock
25 cli.js
@@ -2,25 +2,38 @@
/* eslint-disable */

const Mercury = require('./dist/mercury');
const argv = require('yargs-parser')(process.argv.slice(2));

const [, , url] = process.argv;

(async urlToParse => {
const {
_: [url],
format,
f,
} = argv;
(async (urlToParse, contentType) => {
if (!urlToParse) {
console.log(
'\n\
mercury-parser\n\n\
The Mercury Parser extracts semantic content from any url\n\n\
Usage:\n\
\n\
mercury-parser [url-to-parse]\n\
$ mercury-parser url-to-parse [--format=html|text|markdown]\n\
\n\
'
);
return;
}
try {
const result = await Mercury.parse(urlToParse);
const contentTypeMap = {
html: 'html',
markdown: 'markdown',
md: 'markdown',
text: 'text',
txt: 'text',
};
const result = await Mercury.parse(urlToParse, null, {
contentType: contentTypeMap[contentType],
});
console.log(JSON.stringify(result, null, 2));
} catch (e) {
if (e.message === 'ETIMEDOUT' && false) {
@@ -38,4 +51,4 @@ Usage:\n\
console.error(`\n${reportBug}\n`);
process.exit(1);
}
})(url);
})(url, format || f);
Oops, something went wrong.

0 comments on commit 9b0664b

Please sign in to comment.