forked from LinkedInAttic/sensei
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
3,314 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,47 @@ | |||
type=java | |||
job.class=com.sensei.indexing.hadoop.demo.CarDemo | |||
|
|||
mapreduce.job.maps=2 | |||
sensei.num.shards=3 | |||
|
|||
mapred.job.name=CarDemoShardedIndexing | |||
|
|||
# if the output.path already exists, delete it first | |||
sensei.force.output.overwrite=true | |||
|
|||
# adjust this to a small one if mapper number is huge. default is 50Mb = 52428800 | |||
sensei.max.ramsize.bytes=52428800 | |||
|
|||
############# path of schema for interpreter ############# | |||
|
|||
##### TextJSON schema Sample (car demo) absolute path ###### | |||
sensei.schema.file.url=conf/schema.xml | |||
|
|||
############ Input and Output ################## | |||
|
|||
####### Text JSON data (car demo) ##### | |||
read.lock=data/cars.json | |||
sensei.input.dirs=data/cars.json | |||
|
|||
######## Output configuration ###### | |||
write.lock=example/hadoop-indexing/output | |||
sensei.output.dir=example/hadoop-indexing/fileoutput | |||
|
|||
######## Index output location ###### | |||
sensei.index.path=example/hadoop-indexing/index | |||
|
|||
############# schemas for mapper input ################ | |||
|
|||
sensei.input.format=org.apache.hadoop.mapred.TextInputFormat | |||
|
|||
############## Sharding strategy ################ | |||
sensei.distribution.policy=com.sensei.indexing.hadoop.demo.CarShardingStrategy | |||
|
|||
############# Converter for mapper input (data conversion and filtering) ########## | |||
sensei.mapinput.converter=com.sensei.indexing.hadoop.demo.CarMapInputConverter | |||
|
|||
############# Analyzer configuration for lucene ############### | |||
sensei.document.analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer | |||
sensei.document.analyzer.version=LUCENE_30 | |||
|
|||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,65 @@ | |||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | |||
|
|||
<modelVersion>4.0.0</modelVersion> | |||
<parent> | |||
<groupId>com.senseidb</groupId> | |||
<artifactId>sensei-parent</artifactId> | |||
<version>1.0.0-SNAPSHOT</version> | |||
<relativePath>../../sensei-parent/pom.xml</relativePath> | |||
</parent> | |||
|
|||
<artifactId>sensei-example-hadoop</artifactId> | |||
<packaging>jar</packaging> | |||
<name>sensei example hadoop indexing</name> | |||
<description>sensei hadoop indexer example</description> | |||
|
|||
<!-- Set the compiler to java6 --> | |||
<build> | |||
<plugins> | |||
<plugin> | |||
<groupId>org.apache.maven.plugins</groupId> | |||
<artifactId>maven-compiler-plugin</artifactId> | |||
<version>2.1</version> | |||
<configuration> | |||
<source>1.6</source> | |||
<target>1.6</target> | |||
<encoding>UTF-8</encoding> | |||
</configuration> | |||
</plugin> | |||
<plugin> | |||
<groupId>com.github.maven-hadoop.plugin</groupId> | |||
<artifactId>maven-hadoop-plugin</artifactId> | |||
<version>0.20.1</version> | |||
<configuration> | |||
<hadoopHome>/home/jwang/opensource/hadoop-0.21.0</hadoopHome> | |||
</configuration> | |||
</plugin> | |||
</plugins> | |||
</build> | |||
|
|||
|
|||
<properties> | |||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | |||
</properties> | |||
|
|||
<dependencies> | |||
<dependency> | |||
<groupId>${project.groupId}</groupId> | |||
<artifactId>sensei-hadoop-indexing</artifactId> | |||
<version>${project.version}</version> | |||
</dependency> | |||
<dependency> | |||
<groupId>org.apache.hadoop</groupId> | |||
<artifactId>hadoop-core</artifactId> | |||
<version>0.20.204.0</version> | |||
</dependency> | |||
|
|||
<dependency> | |||
<groupId>org.json</groupId> | |||
<artifactId>json</artifactId> | |||
<version>20080701</version> | |||
</dependency> | |||
</dependencies> | |||
|
|||
</project> |
36 changes: 36 additions & 0 deletions
36
example/hadoop-indexing/src/main/java/com/sensei/indexing/hadoop/demo/CarDemo.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,36 @@ | |||
package com.sensei.indexing.hadoop.demo; | |||
|
|||
|
|||
|
|||
import org.apache.hadoop.conf.Configuration; | |||
import org.apache.hadoop.mapred.JobClient; | |||
import org.apache.hadoop.mapred.JobConf; | |||
import org.apache.hadoop.util.Tool; | |||
import org.apache.hadoop.util.ToolRunner; | |||
|
|||
import com.sensei.indexing.hadoop.job.MapReduceJob; | |||
import com.sensei.indexing.hadoop.util.PropertiesLoader; | |||
|
|||
|
|||
public class CarDemo extends MapReduceJob implements Tool { | |||
|
|||
|
|||
public int run(String[] args) throws Exception { | |||
JobConf conf = createJob(CarDemo.class); | |||
|
|||
conf.setJobName("CarDemo"); | |||
JobClient.runJob(conf); | |||
return 0; | |||
} | |||
|
|||
|
|||
public static void main(String[] args) throws Exception { | |||
long start = System.currentTimeMillis(); | |||
Configuration conf = PropertiesLoader.loadProperties("example/hadoop-indexing/conf/JobCarDemo.job"); | |||
int res = ToolRunner.run(conf, new CarDemo(), args); | |||
long end = System.currentTimeMillis(); | |||
System.out.println("Total time: " + (end - start)); | |||
System.exit(res); | |||
} | |||
|
|||
} |
24 changes: 24 additions & 0 deletions
24
...e/hadoop-indexing/src/main/java/com/sensei/indexing/hadoop/demo/CarMapInputConverter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Original file line | Diff line number | Diff line change |
---|---|---|---|
@@ -0,0 +1,24 @@ | |||
package com.sensei.indexing.hadoop.demo; | |||
|
|||
import org.json.JSONException; | |||
import org.json.JSONObject; | |||
import org.apache.hadoop.conf.Configuration; | |||
import org.apache.hadoop.io.Text; | |||
|
|||
import com.sensei.indexing.hadoop.map.MapInputConverter; | |||
|
|||
public class CarMapInputConverter extends MapInputConverter { | |||
|
|||
@Override | |||
public JSONObject getJsonInput(Object key, Object value, Configuration conf) throws JSONException { | |||
String line = ((Text) value).toString(); | |||
return new JSONObject(line); | |||
} | |||
|
|||
@Override | |||
protected JSONObject doFilter(JSONObject data) throws Exception { | |||
return data; | |||
} | |||
|
|||
|
|||
} |
Oops, something went wrong.