-
Notifications
You must be signed in to change notification settings - Fork 0
/
Top5Uri_everyMonth.pig
21 lines (15 loc) · 1.21 KB
/
Top5Uri_everyMonth.pig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
--using CombinedLogLoader the input logfile is loaded from HDFS locaton '/user/cloudera/input_data'.
--Piggybank.jar is registered to leverage the use of UDF combinedLogLoader
register '/usr/lib/pig/piggybank.jar' ;
define CombinedLogLoader org.apache.pig.piggybank.storage.CombinedLogLoader();
logs_extract = LOAD '/user/cloudera/input_data/weblogs.txt' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader()
AS (addr: chararray, logname: chararray, user: chararray, time: chararray, method: chararray, uri: chararray, proto: chararray,
status: int, bytes: int, referer: chararray, userAgent: chararray);
result_month_uri = GROUP log_month_specific by (month,uri);
result = foreach result_month_uri generate flatten(group) as (month,uri), COUNT($1) as count;
result_month = GROUP result by month;
result_top5Uri = foreach result_month { sorted = order result by count desc;
top_5 = limit sorted 5;
generate flatten(top_5);
};
STORE result_top5Uri INTO '/user/cloudera/pig_output/Top5Uri_everyMonth' USING org.apache.pig.piggybank.storage.CSVExcelStorage();