diff --git a/README.md b/README.md index b2fa2fa..fc5bf67 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ yarn add [-D] check-deadlink ## Example ```ts -const checkDeadlink = require('../src/check-deadlink'); +const checkDeadlink = require('check-deadlink'); (async () => { const result = await checkDeadlink('https://example.com', { diff --git a/examples/example.js b/examples/example.js index 1891bc0..c79e088 100644 --- a/examples/example.js +++ b/examples/example.js @@ -2,9 +2,9 @@ require('ts-node/register'); const checkDeadlink = require('../src/check-deadlink'); (async () => { - const result = await checkDeadlink('https://www.geek.co.jp/', { - verbose: true - }); + const result = await checkDeadlink('https://www.geek.co.jp/'); + + debugger; console.log(result); })() diff --git a/src/check-deadlink.ts b/src/check-deadlink.ts index 2439a2d..9debc9a 100644 --- a/src/check-deadlink.ts +++ b/src/check-deadlink.ts @@ -1,10 +1,12 @@ -import {JSDOM} from 'jsdom'; +import * as nodeUrl from 'url'; +// import {JSDOM} from 'jsdom'; +import {Padex} from '/Users/nju33/github/page-line/src/padex'; // tslint:disable-next-line no-unused import {groupBy, uniq, difference, Dictionary} from 'lodash'; import got = require('got'); -import delay = require('delay'); -import * as dom from './helpers/dom'; -import * as ipath from './helpers/ipath'; +// import delay = require('delay'); +// import * as dom from './helpers/dom'; +// import * as ipath from './helpers/ipath'; declare namespace checkDeadlink { export interface Result { @@ -34,141 +36,184 @@ declare namespace checkDeadlink { } } -const defaultConfig = { - deep: 5, - verbose: false -}; - -const initialData: checkDeadlink.Data = { - result: {} -}; +// const defaultConfig = { +// deep: 5, +// verbose: false +// }; +// +// const initialData: checkDeadlink.Data = { +// result: {} +// }; +// +// const checked: (url: string, data: checkDeadlink.Data) => boolean = ( +// url, +// data +// ) => { +// return Object.keys(data.result).includes(url); +// }; + +const checkDeadlink = async (url: string) => { + const padex = new Padex(url, { + head: false, + sleep: 1000, + deep: 2, + validate({url: aa, prevUrl}) { + console.log('prevUrl', prevUrl); + + if (prevUrl === undefined) { + return true; + } -const checked: (url: string, data: checkDeadlink.Data) => boolean = ( - url, - data -) => { - return Object.keys(data.result).includes(url); -}; + // if (prevUrl === undefined) { + // return true; + // } + const hostname = nodeUrl.parse(prevUrl).hostname; + if (hostname === undefined) { + return false; + } -const checkDeadlink = async ( - url: string, - config: checkDeadlink.Config = {...defaultConfig}, - parentUrl?: string, - data: checkDeadlink.Data = initialData, - deep: number = 1 -) => { - const normalizedUrl = ipath.normalize(url); - if (data.baseUrl === undefined) { - data.baseUrl = normalizedUrl; - } + // console.log('hostname', '==============='); + // console.log(hostname, hostname === 'www.geek.co.jp'); - if (config.verbose) { - if (parentUrl === undefined) { - console.log(normalizedUrl); - } else { - console.log(parentUrl, ' -> ', normalizedUrl); + return hostname === 'www.geek.co.jp'; } - } - try { - const res = await got(url, {timeout: 20000}); - data.result[url] = { - url, - parentUrl, - response: res, - get status() { - return (this as checkDeadlink.Result).response.statusCode as number; - } - }; + }); - if (normalizedUrl.startsWith(data.baseUrl)) { - const doc = new JSDOM(res.body).window.document; - const html = doc.body.innerHTML; - const urls = dom - .getLinks(normalizedUrl, html) - .filter(thisUrl => !checked(thisUrl, data)); - - await Promise.all( - urls.map(async (thisUrl, i) => { - const normalizedThisUrl = ipath.normalize(thisUrl); - - if (normalizedUrl === normalizedThisUrl || deep + 1 > config.deep) { - return; - } - - await delay(i * 15); - - if (data.result[normalizedThisUrl] !== undefined) { - return; - } - - /** - * レスポンスが来る前に再度同じURLで実行されない為に - * とりあえず undefined 以外の値を入れる - */ - if (data.result[normalizedThisUrl] === undefined) { - data.result[normalizedThisUrl] = {} as any; - } - - await checkDeadlink( - normalizedThisUrl, - config, - normalizedUrl, - data, - deep + 1 - ); - }) - ); - } - } catch (err) { - const res: got.Response | undefined = err.response; - if (res === undefined) { - data.result[url] = { - url, - parentUrl, - error: err, - get status() { - return -1 as -1; - } - }; - } else { - data.result[url] = { - url, - parentUrl, - error: err, - get status() { - return -1 as -1; - } - }; - } + const result = await padex.process(); - return; - } + debugger; + // const a: any = result.root.children + // .filter(d => d.response) + // .filter(res => (res as any).statusCode) - const groupedByParentUrl = groupBy(data.result, 'parentUrl'); - Object.keys(groupedByParentUrl).forEach(thisUrl => { - const deadlinks = (groupedByParentUrl[ - thisUrl - ] as checkDeadlink.Result[]).filter(result => { - return ( - result.status === -1 || - result.status === 403 || - result.status === 404 || - result.status === 500 || - result.status === 503 - ); - }); - - if (deadlinks.length === 0) { - delete groupedByParentUrl[thisUrl]; - - return; - } + const err = a.documents.filter(document => { + + console.log(document.children); + + const errorDocuments = (document.children || []) + .filter(child => child.error); - groupedByParentUrl[thisUrl] = deadlinks; + return errorDocuments.length > 0; }); - return groupedByParentUrl; + console.log(err) + + + return result; + + // const normalizedUrl = ipath.normalize(url); + // if (data.baseUrl === undefined) { + // data.baseUrl = normalizedUrl; + // } + // + // if (config.verbose) { + // if (parentUrl === undefined) { + // console.log(normalizedUrl); + // } else { + // console.log(parentUrl, ' -> ', normalizedUrl); + // } + // } + // + // try { + // const res = await got(url, {timeout: 20000}); + // data.result[url] = { + // url, + // parentUrl, + // response: res, + // get status() { + // return (this as checkDeadlink.Result).response.statusCode as number; + // } + // }; + // + // if (normalizedUrl.startsWith(data.baseUrl)) { + // const doc = new JSDOM(res.body).window.document; + // const html = doc.body.innerHTML; + // const urls = dom + // .getLinks(normalizedUrl, html) + // .filter(thisUrl => !checked(thisUrl, data)); + // + // await Promise.all( + // urls.map(async (thisUrl, i) => { + // const normalizedThisUrl = ipath.normalize(thisUrl); + // + // if (normalizedUrl === normalizedThisUrl || deep + 1 > config.deep) { + // return; + // } + // + // await delay(i * 15); + // + // if (data.result[normalizedThisUrl] !== undefined) { + // return; + // } + // + // /** + // * レスポンスが来る前に再度同じURLで実行されない為に + // * とりあえず undefined 以外の値を入れる + // */ + // if (data.result[normalizedThisUrl] === undefined) { + // data.result[normalizedThisUrl] = {} as any; + // } + // + // await checkDeadlink( + // normalizedThisUrl, + // config, + // normalizedUrl, + // data, + // deep + 1 + // ); + // }) + // ); + // } + // } catch (err) { + // const res: got.Response | undefined = err.response; + // if (res === undefined) { + // data.result[url] = { + // url, + // parentUrl, + // error: err, + // get status() { + // return -1 as -1; + // } + // }; + // } else { + // data.result[url] = { + // url, + // parentUrl, + // error: err, + // get status() { + // return -1 as -1; + // } + // }; + // } + // + // return; + // } + // + // const groupedByParentUrl = groupBy(data.result, 'parentUrl'); + // Object.keys(groupedByParentUrl).forEach(thisUrl => { + // const deadlinks = (groupedByParentUrl[ + // thisUrl + // ] as checkDeadlink.Result[]).filter(result => { + // return ( + // result.status === -1 || + // result.status === 403 || + // result.status === 404 || + // result.status === 500 || + // result.status === 503 + // ); + // }); + // + // if (deadlinks.length === 0) { + // delete groupedByParentUrl[thisUrl]; + // + // return; + // } + // + // groupedByParentUrl[thisUrl] = deadlinks; + // }); + // + // return groupedByParentUrl; }; export = checkDeadlink;