-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.sh
More file actions
47 lines (43 loc) · 1.29 KB
/
run.sh
File metadata and controls
47 lines (43 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/bin/bash
#
# BASH script to run automated jobs in Hadoop
#
# post_b2bua-911-mining
#
# luismartingil - 2013
# www.luismartingil.com
#
STREAM=/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.4.0.jar
# hdfs place to store my results
INPUT_FOLDER=b2bua/input
OUTPUT_FOLDER=b2bua/results
TASKS=1
DAT=`date +"%F__%H_%M_%S__%s"`
echo $DAT
for lfile in INb2bua_test INb2bua_241 INb2bua_242
do
for opt in eday dayow hourod minoh eweek dayow_hourod hourod_dayow
do
# hdfs place where my input files are located
INPUT=$INPUT_FOLDER/$lfile.log
OUTPUT=$OUTPUT_FOLDER/$DAT/$lfile/$opt/
echo 'Working with log:'$lfile' and opt:'$opt
echo 'input:'$INPUT
echo 'output:'$OUTPUT
# Lets generate a folder with the results
hadoop fs -mkdir -p $OUTPUT_FOLDER/$DAT/$lfile
hadoop jar $STREAM \
-Dmapred.reduce.tasks=$TASKS \
-input $INPUT \
-output $OUTPUT \
-cmdenv PARAM_OPT=$opt \
-file mapper.py \
-file reducer.py \
-mapper mapper.py \
-reducer reducer.py
# Retrieving the results
mkdir -p /home/cloudera/post_b2bua-911-mining/$OUTPUT_FOLDER/$DAT/$lfile/$opt/
hadoop fs -copyToLocal $OUTPUT /home/cloudera/post_b2bua-911-mining/$OUTPUT_FOLDER/$DAT/$lfile/$opt/
echo 'Working with log:'$lfile' and opt:'$opt' done!'
done
done