-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
711 additions
and
433 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<!DOCTYPE html> | ||
<html lang="ja"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<meta http-equiv="X-UA-Compatible" content="ie=edge"> | ||
<title>Document</title> | ||
</head> | ||
<body> | ||
<a href="/success.html">success</a> | ||
<a href="/error.html">error</a> | ||
|
||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<!DOCTYPE html> | ||
<html lang="ja"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<meta http-equiv="X-UA-Compatible" content="ie=edge"> | ||
<title>Document</title> | ||
</head> | ||
<body> | ||
|
||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import * as path from 'path'; | ||
import serve = require('serve'); | ||
import delay = require('delay'); | ||
import checkDeadlink = require('./check-deadlink'); | ||
|
||
describe('Document', () => { | ||
// @ts-ignore | ||
let server: any; | ||
|
||
beforeAll(async () => { | ||
jest.setTimeout(100000); | ||
server = serve(path.resolve(__dirname, '../e2e'), { | ||
port: 5101 | ||
}); | ||
await delay(5000); | ||
}); | ||
|
||
afterAll(async () => { | ||
if (server !== undefined) { | ||
server.stop(); | ||
} | ||
}); | ||
|
||
test('check deadlink', async () => { | ||
// tslint:disable-next-line no-http-string | ||
const result = await checkDeadlink('http://localhost:5101'); | ||
const urls = Object.keys(result); | ||
|
||
expect(urls).toEqual( | ||
// tslint:disable-next-line no-http-string | ||
expect.arrayContaining(['http://localhost:5101/error.html']) | ||
); | ||
|
||
expect(urls).not.toEqual( | ||
// tslint:disable-next-line no-http-string | ||
expect.arrayContaining(['http://localhost:5101/success.html']) | ||
); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,219 +1,30 @@ | ||
import * as nodeUrl from 'url'; | ||
// import {JSDOM} from 'jsdom'; | ||
import {Padex} from '/Users/nju33/github/page-line/src/padex'; | ||
// tslint:disable-next-line no-unused | ||
import {groupBy, uniq, difference, Dictionary} from 'lodash'; | ||
import got = require('got'); | ||
// import delay = require('delay'); | ||
// import * as dom from './helpers/dom'; | ||
// import * as ipath from './helpers/ipath'; | ||
import {Padex, Document, PadexOptions} from 'padex'; | ||
|
||
declare namespace checkDeadlink { | ||
export interface Result { | ||
url: string; | ||
parentUrl?: string; | ||
response: got.Response<string>; | ||
readonly status: number; | ||
} | ||
|
||
export interface Error { | ||
url: string; | ||
parentUrl?: string; | ||
error: got.GotError; | ||
readonly status: -1; | ||
} | ||
|
||
export interface Config { | ||
deep: number; | ||
verbose: boolean; | ||
} | ||
|
||
export interface Data { | ||
result: { | ||
[url: string]: Result | Error; | ||
}; | ||
baseUrl?: string; | ||
document: Document; | ||
from: Document[]; | ||
} | ||
} | ||
|
||
// const defaultConfig = { | ||
// deep: 5, | ||
// verbose: false | ||
// }; | ||
// | ||
// const initialData: checkDeadlink.Data = { | ||
// result: {} | ||
// }; | ||
// | ||
// const checked: (url: string, data: checkDeadlink.Data) => boolean = ( | ||
// url, | ||
// data | ||
// ) => { | ||
// return Object.keys(data.result).includes(url); | ||
// }; | ||
|
||
const checkDeadlink = async (url: string) => { | ||
const padex = new Padex(url, { | ||
head: false, | ||
sleep: 1000, | ||
deep: 2, | ||
validate({url: aa, prevUrl}) { | ||
console.log('prevUrl', prevUrl); | ||
|
||
if (prevUrl === undefined) { | ||
return true; | ||
} | ||
|
||
// if (prevUrl === undefined) { | ||
// return true; | ||
// } | ||
const hostname = nodeUrl.parse(prevUrl).hostname; | ||
if (hostname === undefined) { | ||
return false; | ||
} | ||
|
||
// console.log('hostname', '==============='); | ||
// console.log(hostname, hostname === 'www.geek.co.jp'); | ||
|
||
return hostname === 'www.geek.co.jp'; | ||
} | ||
|
||
}); | ||
const checkDeadlink = async (url: string, options: PadexOptions = {}) => { | ||
const padex = new Padex(url, options); | ||
|
||
const result = await padex.process(); | ||
|
||
debugger; | ||
// const a: any = result.root.children | ||
// .filter(d => d.response) | ||
// .filter(res => (res as any).statusCode) | ||
|
||
const err = a.documents.filter(document => { | ||
|
||
console.log(document.children); | ||
|
||
const errorDocuments = (document.children || []) | ||
.filter(child => child.error); | ||
|
||
return errorDocuments.length > 0; | ||
const documentsWithError = result.documents.filter(d => d.isError()); | ||
const parentDocumentsHasErrorChild = documentsWithError.map(documentWithError => { | ||
return result.documents.filter(d => d.hasChild(documentWithError)); | ||
}); | ||
|
||
console.log(err) | ||
|
||
|
||
return result; | ||
return documentsWithError.reduce((acc, document, idx) => { | ||
acc[document.normalizedUrl] = { | ||
document, | ||
from: parentDocumentsHasErrorChild[idx], | ||
}; | ||
|
||
// const normalizedUrl = ipath.normalize(url); | ||
// if (data.baseUrl === undefined) { | ||
// data.baseUrl = normalizedUrl; | ||
// } | ||
// | ||
// if (config.verbose) { | ||
// if (parentUrl === undefined) { | ||
// console.log(normalizedUrl); | ||
// } else { | ||
// console.log(parentUrl, ' -> ', normalizedUrl); | ||
// } | ||
// } | ||
// | ||
// try { | ||
// const res = await got(url, {timeout: 20000}); | ||
// data.result[url] = { | ||
// url, | ||
// parentUrl, | ||
// response: res, | ||
// get status() { | ||
// return (this as checkDeadlink.Result).response.statusCode as number; | ||
// } | ||
// }; | ||
// | ||
// if (normalizedUrl.startsWith(data.baseUrl)) { | ||
// const doc = new JSDOM(res.body).window.document; | ||
// const html = doc.body.innerHTML; | ||
// const urls = dom | ||
// .getLinks(normalizedUrl, html) | ||
// .filter(thisUrl => !checked(thisUrl, data)); | ||
// | ||
// await Promise.all( | ||
// urls.map(async (thisUrl, i) => { | ||
// const normalizedThisUrl = ipath.normalize(thisUrl); | ||
// | ||
// if (normalizedUrl === normalizedThisUrl || deep + 1 > config.deep) { | ||
// return; | ||
// } | ||
// | ||
// await delay(i * 15); | ||
// | ||
// if (data.result[normalizedThisUrl] !== undefined) { | ||
// return; | ||
// } | ||
// | ||
// /** | ||
// * レスポンスが来る前に再度同じURLで実行されない為に | ||
// * とりあえず undefined 以外の値を入れる | ||
// */ | ||
// if (data.result[normalizedThisUrl] === undefined) { | ||
// data.result[normalizedThisUrl] = {} as any; | ||
// } | ||
// | ||
// await checkDeadlink( | ||
// normalizedThisUrl, | ||
// config, | ||
// normalizedUrl, | ||
// data, | ||
// deep + 1 | ||
// ); | ||
// }) | ||
// ); | ||
// } | ||
// } catch (err) { | ||
// const res: got.Response<string> | undefined = err.response; | ||
// if (res === undefined) { | ||
// data.result[url] = { | ||
// url, | ||
// parentUrl, | ||
// error: err, | ||
// get status() { | ||
// return -1 as -1; | ||
// } | ||
// }; | ||
// } else { | ||
// data.result[url] = { | ||
// url, | ||
// parentUrl, | ||
// error: err, | ||
// get status() { | ||
// return -1 as -1; | ||
// } | ||
// }; | ||
// } | ||
// | ||
// return; | ||
// } | ||
// | ||
// const groupedByParentUrl = groupBy(data.result, 'parentUrl'); | ||
// Object.keys(groupedByParentUrl).forEach(thisUrl => { | ||
// const deadlinks = (groupedByParentUrl[ | ||
// thisUrl | ||
// ] as checkDeadlink.Result[]).filter(result => { | ||
// return ( | ||
// result.status === -1 || | ||
// result.status === 403 || | ||
// result.status === 404 || | ||
// result.status === 500 || | ||
// result.status === 503 | ||
// ); | ||
// }); | ||
// | ||
// if (deadlinks.length === 0) { | ||
// delete groupedByParentUrl[thisUrl]; | ||
// | ||
// return; | ||
// } | ||
// | ||
// groupedByParentUrl[thisUrl] = deadlinks; | ||
// }); | ||
// | ||
// return groupedByParentUrl; | ||
return acc; | ||
}, {} as any); | ||
}; | ||
|
||
export = checkDeadlink; |
Oops, something went wrong.