Skip to content
This repository has been archived by the owner on Oct 26, 2023. It is now read-only.

Commit

Permalink
update hive app to use hive-runner
Browse files Browse the repository at this point in the history
  • Loading branch information
markpollack committed Oct 8, 2012
1 parent 51012e0 commit 5062b60
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 27 deletions.
Expand Up @@ -33,9 +33,9 @@ public static void main(String[] args) throws Exception {
"/META-INF/spring/hive-apache-log-context.xml", HiveAppWithApacheLogs.class);
log.info("Hive Application Running");
context.registerShutdownHook();
//HiveRunner runner = context.getBean(HiveRunner.class);
HiveRunner runner = context.getBean(HiveRunner.class);
runner.call();

//runner.call();
/*
JdbcPasswordRepository repo = context.getBean(JdbcPasswordRepository.class);
repo.processPasswordFile("password-analysis.hql");
Expand All @@ -44,16 +44,6 @@ public static void main(String[] args) throws Exception {

AnalysisService analysis = context.getBean(AnalysisService.class);
analysis.performAnalysis();


/*
HiveTemplate pigTemplate = context.getBean(HiveTemplate.class);
Properties scriptParameters = new Properties();
scriptParameters.put("piggybanklib","./lib/piggybank-0.9.2.jar");
scriptParameters.put("inputFile","./data/apache.log");
pigTemplate.executeScript("apache-log-simple.pig", scriptParameters);
*/


System.out.println("hit enter to run again");
Scanner scanIn = new Scanner(System.in);
Expand Down
Expand Up @@ -9,38 +9,35 @@

<context:property-placeholder location="hadoop.properties,hive.properties"/>

<configuration id="hadoopConfiguration">
<configuration>
fs.default.name=${hd.fs}
mapred.job.tracker=${mapred.job.tracker}
</configuration>

<hive-server configuration-ref="hadoopConfiguration" auto-startup="true" port="${hive.port}"
<hive-server port="${hive.port}"
properties-location="hive-server.properties"/>

<hive-client-factory id="hiveClientFactory" host="${hive.host}" port="${hive.port}"/>
<hive-client-factory host="${hive.host}" port="${hive.port}"/>

<hive-template id="hiveTemplate" hive-client-factory-ref="hiveClientFactory"/>


<!-- not working now -->
<!--
<hive-runner id="hiveRunner" hive-client-factory-ref="hiveClientFactory"
run-at-startup="false" >

<hive-runner id="hiveRunner" run-at-startup="false" >
<script location="apache-log-simple.hql">
<arguments>
hiveContribJar=/home/mpollack/software/hive-0.8.1-bin/lib/hive-contrib-0.8.1.jar
localInPath="./data/apache.log"
</arguments>
</script>
</hive-runner>
-->

<hive-template id="hiveTemplate"/>
<!--
<beans:bean id="analysisService" class="com.oreilly.springdata.hadoop.hive.AnalysisService">
<beans:constructor-arg name="hiveOperations" ref="hiveTemplate"/>
<beans:property name="hiveContribJar" value="/home/mpollack/software/hive-0.8.1-bin/lib/hive-contrib-0.8.1.jar"/>
<beans:property name="scriptResource" value="classpath:apache-log-simple.hql"/>
<beans:property name="localInPath" value="./data/apache.log"/>
</beans:bean>

-->

</beans:beans>
2 changes: 1 addition & 1 deletion hadoop/hive/src/main/resources/apache-log-simple.hql
Expand Up @@ -10,7 +10,7 @@ DROP TABLE apachelog;

CREATE TABLE apachelog(remoteHost STRING, remoteLogname STRING, user STRING, time STRING, method STRING, uri STRING, proto STRING, status STRING, bytes STRING, referer STRING, userAgent STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' WITH SERDEPROPERTIES ( "input.regex" = "^([^ ]*) +([^ ]*) +([^ ]*) +\\[([^]]*)\\] +\\\"([^ ]*) ([^ ]*) ([^ ]*)\\\" ([^ ]*) ([^ ]*) (?:\\\"-\\\")*\\\"(.*)\\\" (.*)$", "output.format.string" = "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s %10$s %11$s" ) STORED AS TEXTFILE;

LOAD DATA LOCAL INPATH "${hiveconf:localInPath}" INTO TABLE apachelog;
LOAD DATA LOCAL INPATH ${hiveconf:localInPath} INTO TABLE apachelog;

-- basic filtering
-- SELECT a.uri FROM apachelog a WHERE a.method='GET' AND a.status='200';
Expand Down
5 changes: 2 additions & 3 deletions hadoop/pig/src/main/resources/META-INF/spring/pig-context.xml
Expand Up @@ -15,8 +15,7 @@
mapred.job.tracker=${mapred.job.tracker}
</configuration>

<pig-factory id="pigFactory" job-name="passwdAnalysis" exec-type="MAPREDUCE"
properties-location="pig-server.properties"/>
<pig-factory properties-location="pig-server.properties"/>
<!--
<script location="password-analysis.pig">
<arguments>
Expand All @@ -35,7 +34,7 @@
<property name="outputDir" value="${outputDir}"/>
</script>

<pig-runner id="pigRunner" pig-factory-ref="pigFactory"
<pig-runner id="pigRunner"
pre-action="hdfsScript"
run-at-startup="true" >
<script location="password-analysis.pig">
Expand Down

0 comments on commit 5062b60

Please sign in to comment.