forked from syuilo/summaly
-
Notifications
You must be signed in to change notification settings - Fork 10
/
got.ts
154 lines (131 loc) · 3.96 KB
/
got.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import { dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { readFileSync } from 'node:fs';
import got, * as Got from 'got';
import * as cheerio from 'cheerio';
import PrivateIp from 'private-ip';
import { StatusError } from './status-error.js';
import { detectEncoding, toUtf8 } from './encoding.js';
const _filename = fileURLToPath(import.meta.url);
const _dirname = dirname(_filename);
export let agent: Got.Agents = {};
export function setAgent(_agent: Got.Agents) {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
agent = _agent || {};
}
export type GotOptions = {
url: string;
method: 'GET' | 'POST' | 'HEAD';
body?: string;
headers: Record<string, string | undefined>;
typeFilter?: RegExp;
}
const repo = JSON.parse(readFileSync(`${_dirname}/../../package.json`, 'utf8'));
const RESPONSE_TIMEOUT = 20 * 1000;
const OPERATION_TIMEOUT = 60 * 1000;
const MAX_RESPONSE_SIZE = 10 * 1024 * 1024;
const BOT_UA = `SummalyBot/${repo.version}`;
export async function scpaping(url: string, opts?: { lang?: string; }) {
const response = await getResponse({
url,
method: 'GET',
headers: {
'accept': 'text/html,application/xhtml+xml',
'user-agent': BOT_UA,
'accept-language': opts?.lang,
},
typeFilter: /^(text\/html|application\/xhtml\+xml)/,
});
// SUMMALY_ALLOW_PRIVATE_IPはテスト用
const allowPrivateIp = process.env.SUMMALY_ALLOW_PRIVATE_IP === 'true' || Object.keys(agent).length > 0;
if (!allowPrivateIp && response.ip && PrivateIp(response.ip)) {
throw new StatusError(`Private IP rejected ${response.ip}`, 400, 'Private IP Rejected');
}
const encoding = detectEncoding(response.rawBody);
const body = toUtf8(response.rawBody, encoding);
const $ = cheerio.load(body);
return {
body,
$,
response,
};
}
export async function get(url: string) {
const res = await getResponse({
url,
method: 'GET',
headers: {
'accept': '*/*',
},
});
return await res.body;
}
export async function head(url: string) {
const res = await getResponse({
url,
method: 'HEAD',
headers: {
'accept': '*/*',
},
});
return await res;
}
async function getResponse(args: GotOptions) {
const timeout = RESPONSE_TIMEOUT;
const operationTimeout = OPERATION_TIMEOUT;
const req = got<string>(args.url, {
method: args.method,
headers: args.headers,
body: args.body,
timeout: {
lookup: timeout,
connect: timeout,
secureConnect: timeout,
socket: timeout, // read timeout
response: timeout,
send: timeout,
request: operationTimeout, // whole operation timeout
},
agent,
http2: false,
retry: {
limit: 0,
},
});
return await receiveResponse({ req, typeFilter: args.typeFilter });
}
async function receiveResponse<T>(args: { req: Got.CancelableRequest<Got.Response<T>>, typeFilter?: RegExp }) {
const req = args.req;
const maxSize = MAX_RESPONSE_SIZE;
req.on('response', (res: Got.Response) => {
// Check html
if (args.typeFilter && !res.headers['content-type']?.match(args.typeFilter)) {
// console.warn(res.headers['content-type']);
req.cancel(`Rejected by type filter ${res.headers['content-type']}`);
return;
}
// 応答ヘッダでサイズチェック
const contentLength = res.headers['content-length'];
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxSize) {
req.cancel(`maxSize exceeded (${size} > ${maxSize}) on response`);
}
}
});
// 受信中のデータでサイズチェック
req.on('downloadProgress', (progress: Got.Progress) => {
if (progress.transferred > maxSize && progress.percent !== 1) {
req.cancel(`maxSize exceeded (${progress.transferred} > ${maxSize}) on response`);
}
});
// 応答取得 with ステータスコードエラーの整形
const res = await req.catch(e => {
if (e instanceof Got.HTTPError) {
throw new StatusError(`${e.response.statusCode} ${e.response.statusMessage}`, e.response.statusCode, e.response.statusMessage);
} else {
throw e;
}
});
return res;
}