Skip to content

Commit

Permalink
Merge branch 'v0.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
nju33 committed Mar 5, 2018
2 parents 285a2de + 7846c73 commit 1ec0951
Show file tree
Hide file tree
Showing 17 changed files with 711 additions and 433 deletions.
19 changes: 14 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# check-deadlink

Examine deadlinks in a pages

[![npm: check-deadlink](https://img.shields.io/npm/v/check-deadlink.svg)](https://www.npmjs.com/package/check-deadlink)
[![CircleCI](https://circleci.com/gh/nju33/check-deadlink.svg?style=svg&circle-token=a28ff5af8b1e0a0e3f4ec38d619681fc4886f63c)](https://circleci.com/gh/nju33/check-deadlink)
[![Coverage Status](https://coveralls.io/repos/github/nju33/check-deadlink/badge.svg?branch=master)](https://coveralls.io/github/nju33/check-deadlink?branch=master)
Expand All @@ -20,11 +22,18 @@ yarn add [-D] check-deadlink
const checkDeadlink = require('check-deadlink');

(async () => {
const result = await checkDeadlink('https://example.com', {
verbose: true
});

console.log(result);
// options is options of the `padex` package
const result = await checkDeadlink('https://example.com', options);
// result === {
// 'http://error.com': {
// document,
// from: [document, document, ...],
// /* In the above `from`,
// * Array of the Document of the `padex` package,
// * that has linked to this document
// */
// }
// }
})()
.catch(err => {
console.error(err);
Expand Down
14 changes: 14 additions & 0 deletions e2e/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>
<a href="/success.html">success</a>
<a href="/error.html">error</a>

</body>
</html>
12 changes: 12 additions & 0 deletions e2e/success.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>

</body>
</html>
7 changes: 4 additions & 3 deletions examples/example.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ require('ts-node/register');
const checkDeadlink = require('../src/check-deadlink');

(async () => {
const result = await checkDeadlink('https://www.geek.co.jp/');

debugger;
const result = await checkDeadlink('https://www.geek.co.jp/', {
deep: 1
});

console.log(result);
debugger;
})()
.catch(err => {
console.error(err);
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"@geekcojp/tslint-config": "^0.1.2",
"coveralls": "^3.0.0",
"microbundle": "^0.4.4",
"serve": "^6.5.1",
"ts-node": "^5.0.0",
"tslint": "^5.9.1",
"typescript": "^2.7.2"
Expand All @@ -40,6 +41,7 @@
"jest": "^22.4.2",
"jsdom": "^11.6.2",
"lodash": "^4.17.5",
"padex": "^0.1.3",
"ts-jest": "^22.4.0"
},
"jest": {
Expand Down
39 changes: 39 additions & 0 deletions src/check-deadlink.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import * as path from 'path';
import serve = require('serve');
import delay = require('delay');
import checkDeadlink = require('./check-deadlink');

describe('Document', () => {
// @ts-ignore
let server: any;

beforeAll(async () => {
jest.setTimeout(100000);
server = serve(path.resolve(__dirname, '../e2e'), {
port: 5101
});
await delay(5000);
});

afterAll(async () => {
if (server !== undefined) {
server.stop();
}
});

test('check deadlink', async () => {
// tslint:disable-next-line no-http-string
const result = await checkDeadlink('http://localhost:5101');
const urls = Object.keys(result);

expect(urls).toEqual(
// tslint:disable-next-line no-http-string
expect.arrayContaining(['http://localhost:5101/error.html'])
);

expect(urls).not.toEqual(
// tslint:disable-next-line no-http-string
expect.arrayContaining(['http://localhost:5101/success.html'])
);
});
});
219 changes: 15 additions & 204 deletions src/check-deadlink.ts
Original file line number Diff line number Diff line change
@@ -1,219 +1,30 @@
import * as nodeUrl from 'url';
// import {JSDOM} from 'jsdom';
import {Padex} from '/Users/nju33/github/page-line/src/padex';
// tslint:disable-next-line no-unused
import {groupBy, uniq, difference, Dictionary} from 'lodash';
import got = require('got');
// import delay = require('delay');
// import * as dom from './helpers/dom';
// import * as ipath from './helpers/ipath';
import {Padex, Document, PadexOptions} from 'padex';

declare namespace checkDeadlink {
export interface Result {
url: string;
parentUrl?: string;
response: got.Response<string>;
readonly status: number;
}

export interface Error {
url: string;
parentUrl?: string;
error: got.GotError;
readonly status: -1;
}

export interface Config {
deep: number;
verbose: boolean;
}

export interface Data {
result: {
[url: string]: Result | Error;
};
baseUrl?: string;
document: Document;
from: Document[];
}
}

// const defaultConfig = {
// deep: 5,
// verbose: false
// };
//
// const initialData: checkDeadlink.Data = {
// result: {}
// };
//
// const checked: (url: string, data: checkDeadlink.Data) => boolean = (
// url,
// data
// ) => {
// return Object.keys(data.result).includes(url);
// };

const checkDeadlink = async (url: string) => {
const padex = new Padex(url, {
head: false,
sleep: 1000,
deep: 2,
validate({url: aa, prevUrl}) {
console.log('prevUrl', prevUrl);

if (prevUrl === undefined) {
return true;
}

// if (prevUrl === undefined) {
// return true;
// }
const hostname = nodeUrl.parse(prevUrl).hostname;
if (hostname === undefined) {
return false;
}

// console.log('hostname', '===============');
// console.log(hostname, hostname === 'www.geek.co.jp');

return hostname === 'www.geek.co.jp';
}

});
const checkDeadlink = async (url: string, options: PadexOptions = {}) => {
const padex = new Padex(url, options);

const result = await padex.process();

debugger;
// const a: any = result.root.children
// .filter(d => d.response)
// .filter(res => (res as any).statusCode)

const err = a.documents.filter(document => {

console.log(document.children);

const errorDocuments = (document.children || [])
.filter(child => child.error);

return errorDocuments.length > 0;
const documentsWithError = result.documents.filter(d => d.isError());
const parentDocumentsHasErrorChild = documentsWithError.map(documentWithError => {
return result.documents.filter(d => d.hasChild(documentWithError));
});

console.log(err)


return result;
return documentsWithError.reduce((acc, document, idx) => {
acc[document.normalizedUrl] = {
document,
from: parentDocumentsHasErrorChild[idx],
};

// const normalizedUrl = ipath.normalize(url);
// if (data.baseUrl === undefined) {
// data.baseUrl = normalizedUrl;
// }
//
// if (config.verbose) {
// if (parentUrl === undefined) {
// console.log(normalizedUrl);
// } else {
// console.log(parentUrl, ' -> ', normalizedUrl);
// }
// }
//
// try {
// const res = await got(url, {timeout: 20000});
// data.result[url] = {
// url,
// parentUrl,
// response: res,
// get status() {
// return (this as checkDeadlink.Result).response.statusCode as number;
// }
// };
//
// if (normalizedUrl.startsWith(data.baseUrl)) {
// const doc = new JSDOM(res.body).window.document;
// const html = doc.body.innerHTML;
// const urls = dom
// .getLinks(normalizedUrl, html)
// .filter(thisUrl => !checked(thisUrl, data));
//
// await Promise.all(
// urls.map(async (thisUrl, i) => {
// const normalizedThisUrl = ipath.normalize(thisUrl);
//
// if (normalizedUrl === normalizedThisUrl || deep + 1 > config.deep) {
// return;
// }
//
// await delay(i * 15);
//
// if (data.result[normalizedThisUrl] !== undefined) {
// return;
// }
//
// /**
// * レスポンスが来る前に再度同じURLで実行されない為に
// * とりあえず undefined 以外の値を入れる
// */
// if (data.result[normalizedThisUrl] === undefined) {
// data.result[normalizedThisUrl] = {} as any;
// }
//
// await checkDeadlink(
// normalizedThisUrl,
// config,
// normalizedUrl,
// data,
// deep + 1
// );
// })
// );
// }
// } catch (err) {
// const res: got.Response<string> | undefined = err.response;
// if (res === undefined) {
// data.result[url] = {
// url,
// parentUrl,
// error: err,
// get status() {
// return -1 as -1;
// }
// };
// } else {
// data.result[url] = {
// url,
// parentUrl,
// error: err,
// get status() {
// return -1 as -1;
// }
// };
// }
//
// return;
// }
//
// const groupedByParentUrl = groupBy(data.result, 'parentUrl');
// Object.keys(groupedByParentUrl).forEach(thisUrl => {
// const deadlinks = (groupedByParentUrl[
// thisUrl
// ] as checkDeadlink.Result[]).filter(result => {
// return (
// result.status === -1 ||
// result.status === 403 ||
// result.status === 404 ||
// result.status === 500 ||
// result.status === 503
// );
// });
//
// if (deadlinks.length === 0) {
// delete groupedByParentUrl[thisUrl];
//
// return;
// }
//
// groupedByParentUrl[thisUrl] = deadlinks;
// });
//
// return groupedByParentUrl;
return acc;
}, {} as any);
};

export = checkDeadlink;

0 comments on commit 1ec0951

Please sign in to comment.