Skip to content

Commit

Permalink
fix: 在pr227的基础上,做了部分修改. closes #70, closes #90, closes #173
Browse files Browse the repository at this point in the history
  • Loading branch information
l0o0 committed Nov 2, 2023
1 parent 76efdd2 commit 648e185
Showing 1 changed file with 12 additions and 9 deletions.
21 changes: 12 additions & 9 deletions WeiPu.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2023-10-24 10:25:20"
"lastUpdated": "2023-11-02 14:50:43"
}

/*
Expand Down Expand Up @@ -118,7 +118,7 @@ async function scrape(doc, url = doc.location.href) {
var newItem = new Z.Item('journalArticle');
let ID = getIDFromUrl(url);
let login = !!doc.querySelector('#Logout'); // '#user-nav>li>#Logout'
var debugLog = `scraping ${url}\nlogin statue=${login}\n`;
// var debugLog = `scraping ${url}\nlogin statue=${login}\n`;
try {
// 以下POST请求需要校验本地cookies,Scaffold不支持,需在浏览器调试
const referText = await requestText(
Expand All @@ -128,13 +128,13 @@ async function scrape(doc, url = doc.location.href) {
body: `ids=${ID}&strType=title_info&type=endnote`
}
);
debugLog += `Post result is\n${referText}\n`;
// debugLog += `Post result is\n${referText}\n`;
// string -> html
var postResult = parser.parseFromString(referText, "text/html");
debugLog += `transform result to ${typeof (postResult)}\n`;
// debugLog += `transform result to ${typeof (postResult)}\n`;
// html -> string
postResult = postResult.querySelector('input#xmlContent').value;
debugLog += `get xml:\n${postResult}\n`;
// debugLog += `get xml:\n${postResult}\n`;
// string -> xml
postResult = parser.parseFromString(postResult, "application/xml");
var data = {
Expand All @@ -152,7 +152,7 @@ async function scrape(doc, url = doc.location.href) {
};
for (const field in FIELDMAP) {
const path = FIELDMAP[field];
debugLog += `in field ${field}, I get ${postResult.querySelector(path).textContent}\n`;
// debugLog += `in field ${field}, I get ${postResult.querySelector(path).textContent}\n`;
newItem[field] = data.get(path);
}
newItem.creators = data.getAll('Creators > Creator > Name').map(element => (matchCreator(element)));
Expand All @@ -174,15 +174,17 @@ async function scrape(doc, url = doc.location.href) {
newItem.tags = Array.from(doc.querySelectorAll('div.subject > span > a')).map(element => ({
tag: element.title
}));
newItem.debugLog = debugLog;
// newItem.debugLog = debugLog;
}

newItem.extra = '';
for (const field in TRANSLATION) {
const path = TRANSLATION[field];
try {
newItem[field] = doc.querySelector(path).innerText;
newItem.extra += `\n${field}: ${doc.querySelector(path).innerText}`;
}
catch (error) {
newItem[field] = '';
Z.debug("this is an error");
}
}
// 修正维普镜像站中摘要内的英文引号异常
Expand All @@ -202,6 +204,7 @@ async function scrape(doc, url = doc.location.href) {
}];
}
}
if (newItem.date) newItem.date = newItem.date.split("T")[0];
newItem.complete();
}

Expand Down

0 comments on commit 648e185

Please sign in to comment.