Skip to content

Commit

Permalink
fix(cambridge): fix parser
Browse files Browse the repository at this point in the history
  • Loading branch information
tonytonyjan committed Jul 20, 2020
1 parent e6de2db commit 25a13db
Showing 1 changed file with 91 additions and 55 deletions.
146 changes: 91 additions & 55 deletions src/dictionaries/cambridgeCore.js
Original file line number Diff line number Diff line change
@@ -1,63 +1,99 @@
import React, { Fragment } from "react";
import React from "react";

export default async (fetchPromise) => {
const response = await fetchPromise;
if (!response.ok) throw new Error("not ok");

const body = await response.text();
const dom = new DOMParser().parseFromString(body, "text/html");
const entries = dom.querySelectorAll(".entry-body__el");
if (entries.length === 0) return null;
return Array.from(entries).map((element, i) => {
const title = element.querySelector(".hw.dhw")?.textContent;
const partOfSpeech = element.querySelector(".posgram")?.textContent;
const pronunciations = Array.from(
element.querySelectorAll(".pron.dpron")
).map((i) => i.textContent);
const blocks = Array.from(element.querySelectorAll(".def-block")).map(
(i) => ({
badge: i.querySelector(".def-info")?.textContent.trim(),
definition: i.querySelector(".def")?.textContent,
trans: i.querySelector(".trans:first-child")?.textContent,
examples: Array.from(i.querySelectorAll(".examp")).map(
(i) => i.textContent
),
})
);
return (
<Fragment key={i}>
{title && <div className="lead">{title}</div>}
{partOfSpeech && <div className="text-second">{partOfSpeech}</div>}
{pronunciations.length > 0 && (
<ul className="list-inline">
{pronunciations.map((i, index) => (
<li key={index} className="list-inline-item text-secondary">
{i}
</li>
))}
</ul>
)}
{blocks.map(({ badge, definition, trans, examples }, index) => (
<ul key={index} className="list-unstyled">
<li>
{badge && (
<span className="badge badge-secondary mr-1">{badge}</span>
)}
{definition && <span>{definition}</span>}
{trans && <div>{trans}</div>}
{examples.length > 0 && (
<ul>
{examples.map((i, index) => (
<li key={index} className="text-secondary">
{i}
</li>
))}
</ul>
)}
</li>
</ul>
const entries = parse(body);
if (!entries) return null;
return entries.map((i, index) => (
<div key={index}>
<div>
{i.title} <small className="text-secondary">{i.partOfSpeech}</small>
</div>
<div className="text-secondary">
{i.pronunciations
.filter((i) => i.value)
.map((i) => `${i.region} ${i.value}`)
.join(" ")}
</div>
<div>
{i.senses.map((i, index) => (
<div key={index}>
<div>{i.title}</div>
<div>
{i.blocks.map((i, index) => (
<div key={index}>
<div>
<span className="badge badge-secondary">{i.label}</span>{" "}
<span className="text-secondary">{i.gram}</span>{" "}
<span className="text-secondary">
{i.pronunciations
.filter((i) => i.value)
.map((i) => `${i.region} ${i.value}`)
.join(" ")}
</span>{" "}
<span>{i.inflections.join(" | ")}</span>
</div>
<div>{i.definition}</div>
<div>{i.translation}</div>
<ul>
{i.examples.map(({ from, to }, index) => (
<li key={index}>
{from}
{to && <div className="text-secondary">{to}</div>}
</li>
))}
</ul>
</div>
))}
</div>
</div>
))}
</Fragment>
);
});
</div>
</div>
));
};

export const parse = (html) => {
const dom = new DOMParser().parseFromString(html, "text/html");
const entries = Array.from(dom.querySelectorAll(".entry-body__el")).map(
(i) => ({
title: i.querySelector(".hw.dhw")?.textContent,
partOfSpeech: i.querySelector(".posgram")?.textContent,
pronunciations: Array.from(
i.querySelectorAll(".pos-header .dpron-i")
).map((i) => ({
region: i.querySelector(".region")?.textContent,
value: i.querySelector(".pron.dpron")?.textContent,
})),
senses: Array.from(i.querySelectorAll(".pr.dsense")).map((i) => ({
title: i
.querySelector(".dsense_h")
?.textContent?.trim()
?.replace(/\s+/g, " "),
blocks: Array.from(i.querySelectorAll(".def-block")).map((i) => ({
label: i.querySelector(".epp-xref")?.textContent,
gram: i.querySelector(".gram")?.textContent,
pronunciations: Array.from(i.querySelectorAll(".dpron-i")).map(
(i) => ({
region: i.querySelector(".region")?.textContent,
value: i.querySelector(".pron.dpron")?.textContent,
})
),
inflections: Array.from(i.querySelectorAll(".inf.dinf")).map(
(i) => i.textContent
),
definition: i.querySelector(".def")?.textContent,
translation: i.querySelector(".trans")?.textContent,
examples: Array.from(i.querySelectorAll(".examp")).map((i) => ({
from: i.querySelector(".eg")?.textContent,
to: i.querySelector(".trans")?.textContent,
})),
})),
})),
})
);
return entries.length > 0 ? entries : null;
};

0 comments on commit 25a13db

Please sign in to comment.