From 648e1856061460b4fa9a0748f60fe73117fde507 Mon Sep 17 00:00:00 2001 From: l0o0 Date: Thu, 2 Nov 2023 22:53:21 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=9C=A8pr227=E7=9A=84=E5=9F=BA?= =?UTF-8?q?=E7=A1=80=E4=B8=8A=EF=BC=8C=E5=81=9A=E4=BA=86=E9=83=A8=E5=88=86?= =?UTF-8?q?=E4=BF=AE=E6=94=B9.=20closes=20#70,=20closes=20#90,=20closes=20?= =?UTF-8?q?#173?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WeiPu.js | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/WeiPu.js b/WeiPu.js index 3e267419..df5be271 100644 --- a/WeiPu.js +++ b/WeiPu.js @@ -9,7 +9,7 @@ "inRepository": true, "translatorType": 4, "browserSupport": "gcsibv", - "lastUpdated": "2023-10-24 10:25:20" + "lastUpdated": "2023-11-02 14:50:43" } /* @@ -118,7 +118,7 @@ async function scrape(doc, url = doc.location.href) { var newItem = new Z.Item('journalArticle'); let ID = getIDFromUrl(url); let login = !!doc.querySelector('#Logout'); // '#user-nav>li>#Logout' - var debugLog = `scraping ${url}\nlogin statue=${login}\n`; + // var debugLog = `scraping ${url}\nlogin statue=${login}\n`; try { // 以下POST请求需要校验本地cookies,Scaffold不支持,需在浏览器调试 const referText = await requestText( @@ -128,13 +128,13 @@ async function scrape(doc, url = doc.location.href) { body: `ids=${ID}&strType=title_info&type=endnote` } ); - debugLog += `Post result is\n${referText}\n`; + // debugLog += `Post result is\n${referText}\n`; // string -> html var postResult = parser.parseFromString(referText, "text/html"); - debugLog += `transform result to ${typeof (postResult)}\n`; + // debugLog += `transform result to ${typeof (postResult)}\n`; // html -> string postResult = postResult.querySelector('input#xmlContent').value; - debugLog += `get xml:\n${postResult}\n`; + // debugLog += `get xml:\n${postResult}\n`; // string -> xml postResult = parser.parseFromString(postResult, "application/xml"); var data = { @@ -152,7 +152,7 @@ async function scrape(doc, url = doc.location.href) { }; for (const field in FIELDMAP) { const path = FIELDMAP[field]; - debugLog += `in field ${field}, I get ${postResult.querySelector(path).textContent}\n`; + // debugLog += `in field ${field}, I get ${postResult.querySelector(path).textContent}\n`; newItem[field] = data.get(path); } newItem.creators = data.getAll('Creators > Creator > Name').map(element => (matchCreator(element))); @@ -174,15 +174,17 @@ async function scrape(doc, url = doc.location.href) { newItem.tags = Array.from(doc.querySelectorAll('div.subject > span > a')).map(element => ({ tag: element.title })); - newItem.debugLog = debugLog; + // newItem.debugLog = debugLog; } + + newItem.extra = ''; for (const field in TRANSLATION) { const path = TRANSLATION[field]; try { - newItem[field] = doc.querySelector(path).innerText; + newItem.extra += `\n${field}: ${doc.querySelector(path).innerText}`; } catch (error) { - newItem[field] = ''; + Z.debug("this is an error"); } } // 修正维普镜像站中摘要内的英文引号异常 @@ -202,6 +204,7 @@ async function scrape(doc, url = doc.location.href) { }]; } } + if (newItem.date) newItem.date = newItem.date.split("T")[0]; newItem.complete(); }