fix(various): fix formatting/lints - etc

sc10ntech · Apr 10, 2024 · 23b44c4 · 23b44c4
1 parent b63aa89
commit 23b44c4
Show file tree

Hide file tree

Showing 3 changed files with 33 additions and 33 deletions.
diff --git a/src/cleaner.ts b/src/cleaner.ts
@@ -64,7 +64,7 @@ function cleanErrantLineBreaks(doc: cheerio.Root) {
       const contentsNode = doc(cElement);
       if (contentsNode && contentsNode[0] && contentsNode[0].type === "text") {
         contentsNode.replaceWith(
-          contentsNode.text().replace(/([^\n])\n([^\n])/g, "$1 $2")
+          contentsNode.text().replace(/([^\n])\n([^\n])/g, "$1 $2"),
         );
       }
     });
@@ -135,7 +135,7 @@ function divToPara(doc: cheerio.Root, domType: string) {
 
 function getReplacementNodes(
   doc: cheerio.Root,
-  div: cheerio.Cheerio
+  div: cheerio.Cheerio,
 ): cheerio.Cheerio[] {
   let replacementText: string[] = [];
   const nodesToReturn: cheerio.Cheerio[] = [];
@@ -269,7 +269,7 @@ function removeScriptsStyles(doc: cheerio.Root): cheerio.Cheerio {
 
 function replaceWithPara(
   doc: cheerio.Root,
-  div: cheerio.Element
+  div: cheerio.Element,
 ): cheerio.Root {
   const divContent = doc(div).html();
   doc(div).replaceWith(`<p>${divContent}</p>`);

diff --git a/src/extractor.ts b/src/extractor.ts
@@ -33,7 +33,7 @@ export interface Extractor {
   links: (
     doc: cheerio.Root,
     topNode: cheerio.Cheerio,
-    lang: string
+    lang: string,
   ) => LinkObj[];
   locale: (doc: cheerio.Root) => string;
   publisher: (doc: cheerio.Root) => string;
@@ -49,7 +49,7 @@ export interface Extractor {
 function addSiblings(
   doc: cheerio.Root,
   topNode: cheerio.Cheerio,
-  lang: string
+  lang: string,
 ): cheerio.Cheerio {
   const baselineScoreSiblingsPara = getSiblingsScore(doc, topNode, lang);
   const sibs = topNode.prevAll();
@@ -60,7 +60,7 @@ function addSiblings(
       doc,
       lang,
       currentNode,
-      baselineScoreSiblingsPara
+      baselineScoreSiblingsPara,
     );
 
     if (ps) {
@@ -124,7 +124,7 @@ function cleanTitle(title: string, delimiters: string[]): string {
 
 function doesNodeListContainNode(
   list: cheerio.Cheerio[],
-  node: cheerio.Cheerio
+  node: cheerio.Cheerio,
 ): boolean {
   let contains = false;
   for (let i = 0; i < list.length; i++) {
@@ -151,7 +151,7 @@ function getSiblingsContent(
   doc: cheerio.Root,
   lang: string,
   currentSibling: cheerio.Cheerio,
-  baselineScoreSiblingsPara: number
+  baselineScoreSiblingsPara: number,
 ) {
   if (
     currentSibling.get(0).tagName === "p" &&
@@ -188,7 +188,7 @@ function getSiblingsContent(
 function getSiblingsScore(
   doc: cheerio.Root,
   topNode: cheerio.Cheerio,
-  lang: string
+  lang: string,
 ): number {
   const nodesToCheck = topNode.find("p");
   let base = 100000;
@@ -249,7 +249,7 @@ function isAbsoluteUrl(url: string): boolean {
 function isBoostable(
   doc: cheerio.Root,
   node: cheerio.Cheerio,
-  lang: string
+  lang: string,
 ): boolean {
   const minimumStopWordCount = 5;
   const maxStepsAwayFromNode = 3;
@@ -312,14 +312,14 @@ function isHighLinkDensity(doc: cheerio.Root, node: cheerio.Cheerio): boolean {
 function isNodeScoreThresholdMet(
   _doc: cheerio.Root,
   node: cheerio.Cheerio,
-  e: cheerio.Cheerio
+  e: cheerio.Cheerio,
 ): boolean {
   const topNodeScore = getScore(node);
   const currentNodeScore = getScore(e);
   const thresholdScore = topNodeScore * 0.08;
 
   const elIsTdUlOlOrBlockQ = ["td", "ul", "ol", "blockquote"].includes(
-    e.get(0).tagName
+    e.get(0).tagName,
   );
   if (currentNodeScore < thresholdScore && !elIsTdUlOlOrBlockQ) {
     return false;
@@ -356,7 +356,7 @@ function isValidDate(d: string): boolean {
 function postCleanup(
   doc: cheerio.Root,
   targetNode: cheerio.Cheerio,
-  lang: string
+  lang: string,
 ): cheerio.Cheerio {
   const node = addSiblings(doc, targetNode, lang);
 
@@ -423,7 +423,7 @@ function updateScore(node: cheerio.Cheerio, addToScore: number): void {
 const extractor: Extractor = {
   author: (doc: cheerio.Root): string[] => {
     const authorCandidates = doc(
-      "meta[property='article:author'], meta[property='og:article:author'], meta[name='author'], meta[name='dcterms.creator'], meta[name='DC.creator'], meta[name='DC.Creator'], meta[name='dc.creator'], meta[name='creator']"
+      "meta[property='article:author'], meta[property='og:article:author'], meta[name='author'], meta[name='dcterms.creator'], meta[name='DC.creator'], meta[name='DC.Creator'], meta[name='dc.creator'], meta[name='creator']",
     );
 
     const authorList = [];
@@ -552,7 +552,7 @@ const extractor: Extractor = {
   // if it gets to the end without one of these links or meta tags, return the original url as canonical
   canonicalLink: (doc: cheerio.Root, resourceUrl: string): string => {
     const canonicalLinkTag = doc(
-      "link[rel='canonical'], meta[property='og:url']"
+      "link[rel='canonical'], meta[property='og:url']",
     );
     if (canonicalLinkTag) {
       const resourceUrlObj = new URL(resourceUrl);
@@ -562,7 +562,7 @@ const extractor: Extractor = {
         canonicalLinkTag.get(0).tagName === "link"
       ) {
         const cleanedCanonicalLink = cleanNull(
-          canonicalLinkTag.first().attr("href")
+          canonicalLinkTag.first().attr("href"),
         );
         // check if link is a relative url, if so, append origin
         if (!isAbsoluteUrl(cleanedCanonicalLink)) {
@@ -579,7 +579,7 @@ const extractor: Extractor = {
         if (urlProtocol === "https:") {
           cleanedCanonicalMeta = cleanedCanonicalMeta.replace(
             /^http:\/\//i,
-            "https://"
+            "https://",
           );
           return cleanedCanonicalMeta;
         }
@@ -591,7 +591,7 @@ const extractor: Extractor = {
   },
   copyright: (doc: cheerio.Root): string => {
     const copyrightCandidates = doc(
-      "p[class*='copyright'], div[class*='copyright'], span[class*='copyright'], li[class*='copyright'], p[id*='copyright'], div[id*='copyright'], span[id*='copyright'], li[id*='copyright']"
+      "p[class*='copyright'], div[class*='copyright'], span[class*='copyright'], li[class*='copyright'], p[id*='copyright'], div[id*='copyright'], span[id*='copyright'], li[id*='copyright']",
     );
     let text = copyrightCandidates?.first()?.text();
     if (!text) {
@@ -632,17 +632,17 @@ const extractor: Extractor = {
     time, \
     span[class*='date'], \
     p[class*='date'], \
-    div[class*='date']"
+    div[class*='date']",
     );
 
     let dateToReturn = "";
 
     if (dateCandidates) {
       const dateContentCandidate = cleanNull(
-        dateCandidates.first().attr("content")
+        dateCandidates.first().attr("content"),
       );
       const dateTimeCandidate = cleanNull(
-        dateCandidates.first().attr("datetime")
+        dateCandidates.first().attr("datetime"),
       );
       const dateTextCandidate = cleanText(dateCandidates.first().text());
 
@@ -669,11 +669,11 @@ const extractor: Extractor = {
   },
   description: (doc: cheerio.Root): string => {
     const descriptionTag = doc(
-      "meta[name=description], meta[property='og:description']"
+      "meta[name=description], meta[property='og:description']",
     );
     if (descriptionTag) {
       const cleanedDescription = cleanNull(
-        descriptionTag.first().attr("content")
+        descriptionTag.first().attr("content"),
       );
       if (cleanedDescription) {
         return replaceCharacters(cleanedDescription.trim(), false, true);
@@ -685,7 +685,7 @@ const extractor: Extractor = {
     const tag = doc("link").filter(
       (_index, el) =>
         doc(el).attr("rel")?.toLowerCase() === "shortcut icon" ||
-        doc(el).attr("rel")?.toLowerCase() === "icon"
+        doc(el).attr("rel")?.toLowerCase() === "icon",
     );
     const faviconLink = tag.attr("href") || "";
     // ensure the url returned from favicon is absolute url
@@ -697,7 +697,7 @@ const extractor: Extractor = {
   },
   image: (doc: cheerio.Root): string => {
     const images = doc(
-      "meta[property='og:image'], meta[property='og:image:url'], meta[itemprop=image], meta[name='twitter:image:src'], meta[name='twitter:image'], meta[name='twitter:image0']"
+      "meta[property='og:image'], meta[property='og:image:url'], meta[itemprop=image], meta[name='twitter:image:src'], meta[name='twitter:image'], meta[name='twitter:image0']",
     );
 
     if (images.length > 0 && cleanNull(images.first().attr("content"))) {
@@ -753,7 +753,7 @@ const extractor: Extractor = {
   links: (
     doc: cheerio.Root,
     topNode: cheerio.Cheerio,
-    lang: string
+    lang: string,
   ): LinkObj[] => {
     const links: LinkObj[] = [];
 
@@ -789,11 +789,11 @@ const extractor: Extractor = {
   },
   publisher: (doc: cheerio.Root): string => {
     const publisherCandidates = doc(
-      "meta[property='og:site_name'], meta[itemprop=name], meta[name='dc.publisher'], meta[name='DC.publisher'], meta[name='DC.Publisher']"
+      "meta[property='og:site_name'], meta[itemprop=name], meta[name='dc.publisher'], meta[name='DC.publisher'], meta[name='DC.Publisher']",
     );
     if (publisherCandidates) {
       const cleanedPublisher = cleanNull(
-        publisherCandidates.first().attr("content")
+        publisherCandidates.first().attr("content"),
       );
       if (cleanedPublisher) {
         return cleanedPublisher.trim();
@@ -803,7 +803,7 @@ const extractor: Extractor = {
   },
   siteName: (doc: cheerio.Root): string => {
     const siteNameTag = doc(
-      "meta[property='og:site_name'], meta[itemprop=name]"
+      "meta[property='og:site_name'], meta[itemprop=name]",
     );
     if (siteNameTag) {
       const cleanedSiteName = cleanNull(siteNameTag.first().attr("content"));
@@ -822,7 +822,7 @@ const extractor: Extractor = {
     let elements = doc("a[rel='tag']");
     if (elements.length === 0) {
       elements = doc(
-        "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']"
+        "a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']",
       );
       if (elements.length === 0) {
         return [];

diff --git a/src/index.ts b/src/index.ts
@@ -58,7 +58,7 @@ export interface LazyExtractor {
 const siteMetadataExtractor = (
   markup: string,
   resourceUrl: string,
-  lang = "en"
+  lang = "en",
 ): PageData => {
   const resourceUrlObj = new URL(resourceUrl);
   const doc = cheerio.load(markup, { xmlMode: true });
@@ -114,7 +114,7 @@ export default siteMetadataExtractor;
 export const lazy = (
   html: string,
   resourceUrl: string,
-  language = "en"
+  language = "en",
 ): LazyExtractor => {
   const resourceUrlObj = new URL(resourceUrl);
   global.lazyPageData = global.lazyPageData || {};
@@ -129,7 +129,7 @@ export const lazy = (
       const doc = getParsedDoc.call(global, html);
       global.lazyPageData.canonicalLink = extractor.canonicalLink(
         doc,
-        resourceUrl
+        resourceUrl,
       );
       return global.lazyPageData.canonicalLink;
     },