diff --git a/README.md b/README.md index d133f3a..ee71225 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,16 @@ $ cd utsusemi $ npm install ``` -### :pencil: STEP 2. Edit config +### :pencil: STEP 2. Set environment variables OR Edit config.yml + +Set environment variables. + +OR Copy [`config.example.yml`](config.example.yml) to `config.yml`. And edit. +Environment / config.yml Document is [here](docs/env.md) :book: . + ### :rocket: STEP 3. Deploy to AWS ```console @@ -34,8 +40,7 @@ And get endpoints URL and `UtsusemiWebsiteURL` #### :bomb: Destroy utsusemi -1. Call API `/delete?path=/` -2. Run following command. +Run following command. ```console $ AWS_PROFILE=XXxxXXX npm run destroy diff --git a/config.example.yml b/config.example.yml index 3f50055..09a87c1 100644 --- a/config.example.yml +++ b/config.example.yml @@ -1,9 +1,21 @@ --- +# utsusemi service name +serviceName: utsusemi +# AWS deploy region region: ap-northeast-1 +# Crawl target host targetHost: https://opslet.com +# Web site hosting S3 bucket name bucketName: utsusemi.opslet.com +# Crawl worker process workerProcess: 2 +# Delay time when start worker (ms) workerDelay: 100 +# Crawl job threads per worker threadsPerWorker: 1 -# forceTrailingSlash: true +# Change http://example.com/path/to -> http://example.com/path/to/ +forceTrailingSlash: 1 +# Use API Key when request utsusemi API +useApiKey: 0 +# Crawler User Agent # crawlerUserAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 +utsusemi' diff --git a/docs/env.md b/docs/env.md new file mode 100644 index 0000000..c19ecd1 --- /dev/null +++ b/docs/env.md @@ -0,0 +1,14 @@ +# utsusemi Environment / config.yml + +| | Environment | config.yml key | default | +| --- | --- | --- | --- | +| Service name | `UTSUSEMI_SERVICE_NAME` | `serviceName` | utsusemi | +| AWS deploy region | `UTSUSEMI_REGION` | `region` | ap-northeast-1 | +| Crawl target host | `UTSUSEMI_TARGET_HOST` | `targetHost` | | +| Web site hosting S3 bucket name | `UTSUSEMI_BUCKET_NAME` | `bucketName` | | +| Crawl worker process | `UTSUSEMI_WORKER_PROCESS` | `workerProcess` | 2 | +| Delay time when start worker (ms) | `UTSUSEMI_WORKER_DELAY` | `workerDelay` | 100 | +| Crawl job threads per worker | `UTSUSEMI_THREADS_PER_WORKER` | `threadsPerWorker` | 1 | +| Change http://example.com/path/to -> http://example.com/path/to/ | `UTSUSEMI_FORCE_TRAILING_SLASH` | `forceTrailingSlash` | 1 | +| Use API Key when request utsusemi API | `UTSUSEMI_USE_API_KEY` | `useApiKey` | 0 | +| Crawler User Agent | `UTSUSEMI_CRAWLER_USER_AGENT` | `crawlerUserAgent` | `ustusemi/{version}` | diff --git a/serverless.yml b/serverless.yml index 86fe223..2bdba6c 100644 --- a/serverless.yml +++ b/serverless.yml @@ -11,11 +11,11 @@ provider: UTSUSEMI_TARGET_HOST: ${env:UTSUSEMI_TARGET_HOST, file(./config.yml):targetHost} UTSUSEMI_BUCKET_NAME: ${env:UTSUSEMI_BUCKET_NAME, file(./config.yml):bucketName} UTSUSEMI_WORKER_PROCESS: ${env:UTSUSEMI_WORKER_PROCESS, file(./config.yml):workerProcess, '2'} - UTSUSEMI_WORKER_DELAY: ${env:UTSUSEMI_WORKER_DELAY, file(./config.yml):workerDelay, '2'} - UTSUSEMI_THREADS_PER_WORKER: ${env:UTSUSEMI_THREADS_PER_WORKER, file(./config.yml):threadsPerWorker, '5'} + UTSUSEMI_WORKER_DELAY: ${env:UTSUSEMI_WORKER_DELAY, file(./config.yml):workerDelay, '100'} + UTSUSEMI_THREADS_PER_WORKER: ${env:UTSUSEMI_THREADS_PER_WORKER, file(./config.yml):threadsPerWorker, '1'} UTSUSEMI_FORCE_TRAILING_SLASH: ${env:UTSUSEMI_FORCE_TRAILING_SLASH, file(./config.yml):forceTrailingSlash, '1'} - UTSUSEMI_CRAWLER_USER_AGENT: ${env:UTSUSEMI_CRAWLER_USER_AGENT, file(./config.yml):crawlerUserAgent, ''} UTSUSEMI_USE_API_KEY: ${env:UTSUSEMI_USE_API_KEY, file(./config.yml):useApiKey, '0'} + UTSUSEMI_CRAWLER_USER_AGENT: ${env:UTSUSEMI_CRAWLER_USER_AGENT, file(./config.yml):crawlerUserAgent, ''} runtime: nodejs6.10 stage: ${self:provider.environment.UTSUSEMI_STAGE} region: ${self:provider.environment.UTSUSEMI_REGION}