From aac773db5b4dabc2793f6746f1b59987246db771 Mon Sep 17 00:00:00 2001 From: seunggabi Date: Wed, 15 Feb 2023 20:58:20 +0900 Subject: [PATCH] (#7) feat: aws get clusters loop --- yarn-log-collector/.gitignore | 2 + yarn-log-collector/README.md | 30 +++++++--- yarn-log-collector/start-collector.sh | 79 +++++++++++++++++---------- 3 files changed, 75 insertions(+), 36 deletions(-) create mode 100644 yarn-log-collector/.gitignore diff --git a/yarn-log-collector/.gitignore b/yarn-log-collector/.gitignore new file mode 100644 index 0000000..82dcb7b --- /dev/null +++ b/yarn-log-collector/.gitignore @@ -0,0 +1,2 @@ +*.json +*.zip diff --git a/yarn-log-collector/README.md b/yarn-log-collector/README.md index 3abf5c2..5b4a3f7 100644 --- a/yarn-log-collector/README.md +++ b/yarn-log-collector/README.md @@ -17,16 +17,30 @@ - Run example ``` -sh start-collector.sh -m 3 -u http://10.10.160.64:8088 -Start yarn logs collecting from curl. -HTTP Secure-Mode: 3 -YARN-RM URL: http://10.10.160.64:8088 -Yarn Cluter API target: http://10.10.160.64:8088/ws/v1/cluster/apps -HTTP Case +sh start-collector.sh -m 3 -u 10.10.160.64 +### Start yarn logs collecting from curl. +### HTTP Secure-Mode: 3 +### YARN-RM HOST: 10.10.160.64 +### Yarn Cluter API target: http://10.10.160.64:8088/ws/v1/cluster/apps +### HTTP Case % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 18.0M 0 18.0M 0 0 22.0M 0 --:--:-- --:--:-- --:--:-- 22.0M -Collecting yarn logs completed. +### Collecting yarn logs completed. +``` +``` +sh start-collector.sh -m 3 +### Get emr list-clusters --active. +10.10.160.64 emr-test +### Start yarn logs collecting from curl. +### HTTP Secure-Mode: 3 +### YARN-RM HOST: 10.10.160.64 +### Yarn Cluter API target: http://10.10.160.64:8088/ws/v1/cluster/apps +### HTTP Case + % Total % Received % Xferd Average Speed Time Time Time Current + Dload Upload Total Spent Left Speed +100 18.0M 0 18.0M 0 0 22.0M 0 --:--:-- --:--:-- --:--:-- 22.0M +### Collecting yarn logs completed. ``` Result file(collected yarn logs) generated in output running directory. @@ -41,7 +55,7 @@ Set up a cron job by checking the log retention period so that no logs are dropp - Cron Set Cron service like this ``` - 00 */4 * * * /home/hadoop/start-collector.sh -m 3 -u http://10.10.160.64:8088 + 00 */4 * * * /home/hadoop/start-collector.sh -m 3 -u 10.10.160.64 ``` - Description Since the production cluster is busy at specific times such as batch jobs, it is recommended to set enough margin for the execution cycle. Therefore, though 30,000 jobs are executed a day, but the log is collected every 4 hours. After collecting logs with duplicates, it's going to be removed with orgarnizer application. diff --git a/yarn-log-collector/start-collector.sh b/yarn-log-collector/start-collector.sh index 4977af6..69cb1d5 100644 --- a/yarn-log-collector/start-collector.sh +++ b/yarn-log-collector/start-collector.sh @@ -2,38 +2,61 @@ #HTTP Secure Mode : [1] HTTPS with Kerberos Case [2] HTTPS Case [3] HTTP Case #YARN Resource Manager URL form: -#[HTTPS] https://yarn-resoure-manager-ip-adress:8090 -#[HTTP] http://yarn-resoure-manager-ip-adress:8088 +#[HTTPS] yarn-resoure-manager-ip-adress:8090 +#[HTTP] yarn-resoure-manager-ip-adress:8088 while getopts m:u: flag do - case "${flag}" in - m) mode=${OPTARG};; - u) url=${OPTARG};; - esac + case "${flag}" in + m) mode=${OPTARG};; + u) url=${OPTARG};; + esac done -output=customer-yarn-logs-json-$(date +"%Y-%m-%d-%T").json -cluster_api="/ws/v1/cluster/apps" -target="${url}${cluster_api}" - -echo "Start yarn logs collecting from curl." -echo "HTTP Secure-Mode: $mode" -echo "YARN-RM URL: $url" -echo "Yarn Cluter API target: $target" - -if [ $mode -eq 1 ] -then - echo "HTTPS with Kerberos Case" - curl -k --negotiate -u: -X GET $target > ${output} -elif [ $mode -eq 2 ] -then - echo "HTTPS Case" - curl -k -u: -X GET $target > ${output} -elif [ $mode -eq 3 ] -then - echo "HTTP Case" - curl -X GET $target > ${output} +if [[ -z "${url}" ]]; then + echo "### Get emr list-clusters --active." + + list=$(aws emr list-clusters --active | jq -r ".Clusters[].Id") + for id in ${list} ; do + dns=$(aws emr describe-cluster --cluster-id ${id} | + jq -r ".Cluster.MasterPublicDnsName") + dns=$(echo ${dns} | + sed -r "s/ip-([0-9]+)-([0-9]+)-([0-9]+)-([0-9]+)\.[[:alnum:]-]*\.compute\.internal/\1.\2.\3.\4/g") + + name=$(aws emr describe-cluster --cluster-id ${id} | + jq -r ".Cluster.Name") + echo ${dns} ${name} + + url="${url} ${dns}" + done fi -echo "Collecting yarn logs completed." +for i in ${url} ; do + output=customer-yarn-logs-json-${i}-$(date +"%Y-%m-%d-%T").json + cluster_api=/ws/v1/cluster/apps + + echo "### Start yarn logs collecting from curl." + echo "### HTTP Secure-Mode: ${mode}" + echo "### YARN-RM URL: ${i}" + + if [ $mode -eq 1 ] + then + echo "### HTTPS with Kerberos Case" + target=https://${i}:8090${cluster_api} + curl -k --negotiate -u: -X GET {target} > ${output} + elif [ $mode -eq 2 ] + then + echo "### HTTPS Case" + target=https://${i}:8090${cluster_api} + curl -k -u: -X GET ${target} > ${output} + elif [ $mode -eq 3 ] + then + echo "### HTTP Case" + target=http://${i}:8088${cluster_api} + curl -X GET ${target} > ${output} + fi + + echo "### Yarn Cluter API target: ${target}" +done + +echo "### Collecting yarn logs completed."