Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

update hive app to use hive-runner

  • Loading branch information...
commit 5062b605e691a3387d095f1c6625d3dccd6296f4 1 parent 51012e0
mpollack authored
View
14 hadoop/hive/src/main/java/com/oreilly/springdata/hadoop/hive/HiveAppWithApacheLogs.java
@@ -33,9 +33,9 @@ public static void main(String[] args) throws Exception {
"/META-INF/spring/hive-apache-log-context.xml", HiveAppWithApacheLogs.class);
log.info("Hive Application Running");
context.registerShutdownHook();
- //HiveRunner runner = context.getBean(HiveRunner.class);
+ HiveRunner runner = context.getBean(HiveRunner.class);
+ runner.call();
- //runner.call();
/*
JdbcPasswordRepository repo = context.getBean(JdbcPasswordRepository.class);
repo.processPasswordFile("password-analysis.hql");
@@ -44,16 +44,6 @@ public static void main(String[] args) throws Exception {
AnalysisService analysis = context.getBean(AnalysisService.class);
analysis.performAnalysis();
-
-
- /*
- HiveTemplate pigTemplate = context.getBean(HiveTemplate.class);
- Properties scriptParameters = new Properties();
- scriptParameters.put("piggybanklib","./lib/piggybank-0.9.2.jar");
- scriptParameters.put("inputFile","./data/apache.log");
- pigTemplate.executeScript("apache-log-simple.pig", scriptParameters);
- */
-
System.out.println("hit enter to run again");
Scanner scanIn = new Scanner(System.in);
View
19 hadoop/hive/src/main/resources/META-INF/spring/hive-apache-log-context.xml
@@ -9,23 +9,19 @@
<context:property-placeholder location="hadoop.properties,hive.properties"/>
- <configuration id="hadoopConfiguration">
+ <configuration>
fs.default.name=${hd.fs}
mapred.job.tracker=${mapred.job.tracker}
</configuration>
- <hive-server configuration-ref="hadoopConfiguration" auto-startup="true" port="${hive.port}"
+ <hive-server port="${hive.port}"
properties-location="hive-server.properties"/>
- <hive-client-factory id="hiveClientFactory" host="${hive.host}" port="${hive.port}"/>
+ <hive-client-factory host="${hive.host}" port="${hive.port}"/>
- <hive-template id="hiveTemplate" hive-client-factory-ref="hiveClientFactory"/>
-
-
<!-- not working now -->
- <!--
- <hive-runner id="hiveRunner" hive-client-factory-ref="hiveClientFactory"
- run-at-startup="false" >
+
+ <hive-runner id="hiveRunner" run-at-startup="false" >
<script location="apache-log-simple.hql">
<arguments>
hiveContribJar=/home/mpollack/software/hive-0.8.1-bin/lib/hive-contrib-0.8.1.jar
@@ -33,14 +29,15 @@
</arguments>
</script>
</hive-runner>
- -->
+ <hive-template id="hiveTemplate"/>
+ <!--
<beans:bean id="analysisService" class="com.oreilly.springdata.hadoop.hive.AnalysisService">
<beans:constructor-arg name="hiveOperations" ref="hiveTemplate"/>
<beans:property name="hiveContribJar" value="/home/mpollack/software/hive-0.8.1-bin/lib/hive-contrib-0.8.1.jar"/>
<beans:property name="scriptResource" value="classpath:apache-log-simple.hql"/>
<beans:property name="localInPath" value="./data/apache.log"/>
</beans:bean>
-
+ -->
</beans:beans>
View
2  hadoop/hive/src/main/resources/apache-log-simple.hql
@@ -10,7 +10,7 @@ DROP TABLE apachelog;
CREATE TABLE apachelog(remoteHost STRING, remoteLogname STRING, user STRING, time STRING, method STRING, uri STRING, proto STRING, status STRING, bytes STRING, referer STRING, userAgent STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' WITH SERDEPROPERTIES ( "input.regex" = "^([^ ]*) +([^ ]*) +([^ ]*) +\\[([^]]*)\\] +\\\"([^ ]*) ([^ ]*) ([^ ]*)\\\" ([^ ]*) ([^ ]*) (?:\\\"-\\\")*\\\"(.*)\\\" (.*)$", "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s" ) STORED AS TEXTFILE;
-LOAD DATA LOCAL INPATH "${hiveconf:localInPath}" INTO TABLE apachelog;
+LOAD DATA LOCAL INPATH ${hiveconf:localInPath} INTO TABLE apachelog;
-- basic filtering
-- SELECT a.uri FROM apachelog a WHERE a.method='GET' AND a.status='200';
View
5 hadoop/pig/src/main/resources/META-INF/spring/pig-context.xml
@@ -15,8 +15,7 @@
mapred.job.tracker=${mapred.job.tracker}
</configuration>
- <pig-factory id="pigFactory" job-name="passwdAnalysis" exec-type="MAPREDUCE"
- properties-location="pig-server.properties"/>
+ <pig-factory properties-location="pig-server.properties"/>
<!--
<script location="password-analysis.pig">
<arguments>
@@ -35,7 +34,7 @@
<property name="outputDir" value="${outputDir}"/>
</script>
- <pig-runner id="pigRunner" pig-factory-ref="pigFactory"
+ <pig-runner id="pigRunner"
pre-action="hdfsScript"
run-at-startup="true" >
<script location="password-analysis.pig">
Please sign in to comment.
Something went wrong with that request. Please try again.