Skip to content

Commit

Permalink
Merge remote branch 'bcui/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
wonlay committed Nov 10, 2011
2 parents b2d0cab + cc2f3c1 commit 838d720
Show file tree
Hide file tree
Showing 34 changed files with 3,314 additions and 7 deletions.
6 changes: 5 additions & 1 deletion .classpath
@@ -1,8 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<classpath> <classpath>
<classpathentry kind="src" path="sensei-core/src/main/java"/> <classpathentry kind="src" path="sensei-core/src/main/java"/>
<classpathentry kind="src" path="example/hadoop-indexing/src/main/java"/>
<classpathentry kind="src" path="sensei-hadoop-indexing/src/main/java"/>
<classpathentry kind="src" path="perf/src/main/java"/> <classpathentry kind="src" path="perf/src/main/java"/>
<classpathentry kind="src" path="sensei-core/src/test/java"/> <classpathentry kind="src" path="sensei-core/src/test/java"/>
<classpathentry kind="lib" path="lib/commons-httpclient-3.1.jar"/>
<classpathentry kind="var" path="M2_REPO/javax/inject/javax.inject/1/javax.inject-1.jar"/> <classpathentry kind="var" path="M2_REPO/javax/inject/javax.inject/1/javax.inject-1.jar"/>
<classpathentry kind="var" path="M2_REPO/javax/jms/jms/1.1/jms-1.1.jar"/> <classpathentry kind="var" path="M2_REPO/javax/jms/jms/1.1/jms-1.1.jar"/>
<classpathentry kind="var" path="M2_REPO/javax/servlet/servlet-api/2.5/servlet-api-2.5.jar"/> <classpathentry kind="var" path="M2_REPO/javax/servlet/servlet-api/2.5/servlet-api-2.5.jar"/>
Expand All @@ -26,6 +29,7 @@
<classpathentry kind="var" path="M2_REPO/fastutil/fastutil/5.0.5/fastutil-5.0.5.jar"/> <classpathentry kind="var" path="M2_REPO/fastutil/fastutil/5.0.5/fastutil-5.0.5.jar"/>
<classpathentry kind="var" path="M2_REPO/com/google/inject/guice/3.0/guice-3.0.jar"/> <classpathentry kind="var" path="M2_REPO/com/google/inject/guice/3.0/guice-3.0.jar"/>
<classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient/4.1/httpclient-4.1.jar"/> <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpclient/4.1/httpclient-4.1.jar"/>
<classpathentry kind="var" path="M2_REPO/org/apache/hadoop/hadoop-core/0.20.203.0/hadoop-core-0.20.203.0.jar"/>
<classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpcore/4.1/httpcore-4.1.jar"/> <classpathentry kind="var" path="M2_REPO/org/apache/httpcomponents/httpcore/4.1/httpcore-4.1.jar"/>
<classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-core-asl/1.4.2/jackson-core-asl-1.4.2.jar"/> <classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-core-asl/1.4.2/jackson-core-asl-1.4.2.jar"/>
<classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-mapper-asl/1.7.5/jackson-mapper-asl-1.7.5.jar"/> <classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-mapper-asl/1.7.5/jackson-mapper-asl-1.7.5.jar"/>
Expand Down Expand Up @@ -71,7 +75,7 @@
<classpathentry kind="var" path="M2_REPO/org/springframework/spring-core/2.5.6/spring-core-2.5.6.jar"/> <classpathentry kind="var" path="M2_REPO/org/springframework/spring-core/2.5.6/spring-core-2.5.6.jar"/>
<classpathentry kind="var" path="M2_REPO/org/apache/velocity/velocity/1.6.4/velocity-1.6.4.jar"/> <classpathentry kind="var" path="M2_REPO/org/apache/velocity/velocity/1.6.4/velocity-1.6.4.jar"/>
<classpathentry kind="var" path="M2_REPO/com/linkedin/zoie/zoie-core/2.7.0-SNAPSHOT/zoie-core-2.7.0-SNAPSHOT.jar"/> <classpathentry kind="var" path="M2_REPO/com/linkedin/zoie/zoie-core/2.7.0-SNAPSHOT/zoie-core-2.7.0-SNAPSHOT.jar"/>
<classpathentry kind="var" path="M2_REPO/com/browseengine/bobo/bobo-browse/3.0.0-SNAPSHOT/bobo-browse-3.0.0-SNAPSHOT.jar"/> <classpathentry kind="var" path="M2_REPO/com/browseengine/bobo/bobo-browse/3.0.0-SNAPSHOT/bobo-browse-3.0.0-SNAPSHOT.jar"/>
<classpathentry kind="var" path="M2_REPO/com/linkedin/zoie/zoie-jms/2.7.0-SNAPSHOT/zoie-jms-2.7.0-SNAPSHOT.jar" sourcepath="M2_REPO/com/linkedin/zoie/zoie-jms/2.7.0-SNAPSHOT/zoie-jms-2.7.0-SNAPSHOT-sources.jar"> <classpathentry kind="var" path="M2_REPO/com/linkedin/zoie/zoie-jms/2.7.0-SNAPSHOT/zoie-jms-2.7.0-SNAPSHOT.jar" sourcepath="M2_REPO/com/linkedin/zoie/zoie-jms/2.7.0-SNAPSHOT/zoie-jms-2.7.0-SNAPSHOT-sources.jar">
<attributes> <attributes>
<attribute name="javadoc_location" value="jar:file:/Users/jwang/.m2/repository/com/linkedin/zoie/zoie-jms/2.7.0-SNAPSHOT/zoie-jms-2.7.0-SNAPSHOT-javadoc.jar!/"/> <attribute name="javadoc_location" value="jar:file:/Users/jwang/.m2/repository/com/linkedin/zoie/zoie-jms/2.7.0-SNAPSHOT/zoie-jms-2.7.0-SNAPSHOT-javadoc.jar!/"/>
Expand Down
59 changes: 54 additions & 5 deletions clients/python/src/sensei_client.py
Expand Up @@ -158,19 +158,24 @@
<statement> ::= ( <select_stmt> | <describe_stmt> ) [';'] <statement> ::= ( <select_stmt> | <describe_stmt> ) [';']
<select_stmt> ::= SELECT <select_list> <from_clause> [<where_clause>] [<given_clause>] <additional_clauses> <select_stmt> ::= SELECT <select_list> <from_clause> [<where_clause>] [<given_clause>]
[<additional_clauses>]
<describe_stmt> ::= ( DESC | DESCRIBE ) <index_name> <describe_stmt> ::= ( DESC | DESCRIBE ) <index_name>
<select_list> ::= '*' | <column_name_list> <select_list> ::= '*' | <column_name_list>
<column_name_list> ::= <column_name> ( ',' <column_name> )* <column_name_list> ::= <column_name> ( ',' <column_name> )*
<from_clause> ::= FROM <index_name> <from_clause> ::= FROM <index_name>
<where_clause> ::= WHERE <search_condition> <where_clause> ::= WHERE <search_condition>
<search_condition> ::= <predicates> <search_condition> ::= <predicates>
| <cumulative_predicates> | <cumulative_predicates>
<predicates> ::= <predicate> ( AND <predicate> )* <predicates> ::= <predicate> ( AND <predicate> )*
<predicate> ::= <in_predicate> <predicate> ::= <in_predicate>
| <contains_all_predicate> | <contains_all_predicate>
| <equal_predicate> | <equal_predicate>
Expand All @@ -182,92 +187,130 @@
| <same_column_or_pred> | <same_column_or_pred>
<in_predicate> ::= <column_name> [NOT] IN <value_list> [<except_clause>] [<predicate_props>] <in_predicate> ::= <column_name> [NOT] IN <value_list> [<except_clause>] [<predicate_props>]
<contains_all_predicate> ::= <column_name> CONTAINS ALL <value_list> [<except_clause>] [<predicate_props>]
<contains_all_predicate> ::= <column_name> CONTAINS ALL <value_list> [<except_clause>]
[<predicate_props>]
<equal_predicate> ::= <column_name> '=' <value> [<predicate_props>] <equal_predicate> ::= <column_name> '=' <value> [<predicate_props>]
<not_equal_predicate> ::= <column_name> '<>' <value> [<predicate_props>] <not_equal_predicate> ::= <column_name> '<>' <value> [<predicate_props>]
<query_predicate> ::= QUERY IS <quoted_string> <query_predicate> ::= QUERY IS <quoted_string>
<between_predicate> ::= <column_name> [NOT] BETWEEN <value> AND <value> <between_predicate> ::= <column_name> [NOT] BETWEEN <value> AND <value>
<range_predicate> ::= <column_name> <range_op> <numeric> <range_predicate> ::= <column_name> <range_op> <numeric>
<time_predicate> ::= <column_name> IN LAST <time_span> <time_predicate> ::= <column_name> IN LAST <time_span>
| <column_name> ( SINCE | AFTER | BEFORE ) <time_expr> | <column_name> ( SINCE | AFTER | BEFORE ) <time_expr>
<same_column_or_pred> ::= '(' + <cumulative_predicates> + ')' <same_column_or_pred> ::= '(' <cumulative_predicates> ')'
<cumulative_predicates> ::= <cumulative_predicate> ( OR <cumulative_predicate> )* <cumulative_predicates> ::= <cumulative_predicate> ( OR <cumulative_predicate> )*
<cumulative_predicate> ::= <in_predicate> <cumulative_predicate> ::= <in_predicate>
| <equal_predicate> | <equal_predicate>
| <between_predicate> | <between_predicate>
| <range_predicate> | <range_predicate>
| <time_predicate> | <time_predicate>
<value_list> ::= '(' <value> ( ',' <value> )* ')' <value_list> ::= '(' <value> ( ',' <value> )* ')'
<value> ::= <quoted_string> | <numeric> <value> ::= <quoted_string> | <numeric>
<range_op> ::= '<' | '<=' | '>=' | '>' <range_op> ::= '<' | '<=' | '>=' | '>'
<except_clause> ::= EXCEPT <value_list> <except_clause> ::= EXCEPT <value_list>
<predicate_props> ::= WITH <prop_list> <predicate_props> ::= WITH <prop_list>
<prop_list> ::= '(' <key_value_pair> ( ',' <key_value_pair> )* ')' <prop_list> ::= '(' <key_value_pair> ( ',' <key_value_pair> )* ')'
<key_value_pair> ::= <quoted_string> ':' <quoted_string> <key_value_pair> ::= <quoted_string> ':' <quoted_string>
<given_clause> ::= GIVEN FACET PARAM <facet_param_list> <given_clause> ::= GIVEN FACET PARAM <facet_param_list>
<facet_param_list> ::= <facet_param> ( ',' <facet_param> )* <facet_param_list> ::= <facet_param> ( ',' <facet_param> )*
<facet_param> ::= '(' <facet_name> <facet_param_name> <facet_param_type> <facet_param_value> ')' <facet_param> ::= '(' <facet_name> <facet_param_name> <facet_param_type> <facet_param_value> ')'
<facet_param_name> ::= <quoted_string> <facet_param_name> ::= <quoted_string>
<facet_param_type> ::= BOOLEAN | INT | LONG | STRING | BYTEARRAY | DOUBLE <facet_param_type> ::= BOOLEAN | INT | LONG | STRING | BYTEARRAY | DOUBLE
<facet_param_value> ::= <quoted_string> <facet_param_value> ::= <quoted_string>
<additional_clauses> ::= ( <additional_clause> )* <additional_clauses> ::= ( <additional_clause> )+
<additional_clause> ::= <order_by_clause> <additional_clause> ::= <order_by_clause>
| <group_by_clause> | <group_by_clause>
| <limit_clause> | <limit_clause>
| <browse_by_clause> | <browse_by_clause>
| <fetching_stored_clause> | <fetching_stored_clause>
<order_by_clause> ::= ORDER BY <sort_specs> <order_by_clause> ::= ORDER BY <sort_specs>
<sort_specs> ::= <sort_spec> ( ',', <sort_spec> )* <sort_specs> ::= <sort_spec> ( ',', <sort_spec> )*
<sort_spec> ::= <column_name> [<ordering_spec>] <sort_spec> ::= <column_name> [<ordering_spec>]
<ordering_spec> ::= ASC | DESC <ordering_spec> ::= ASC | DESC
<group_by_clause> ::= GROUP BY <group_spec> <group_by_clause> ::= GROUP BY <group_spec>
<group_spec> ::= <facet_name> [TOP <max_per_group>] <group_spec> ::= <facet_name> [TOP <max_per_group>]
<limit_clause> ::= LIMIT [<offset> ','] <count> <limit_clause> ::= LIMIT [<offset> ','] <count>
<offset> ::= ( <digit> )+ <offset> ::= ( <digit> )+
<count> ::= ( <digit> )+ <count> ::= ( <digit> )+
<browse_by_clause> ::= BROWSE BY <facet_specs> <browse_by_clause> ::= BROWSE BY <facet_specs>
<facet_specs> ::= <facet_spec> ( ',' <facet_spec> )* <facet_specs> ::= <facet_spec> ( ',' <facet_spec> )*
<facet_spec> ::= <facet_name> [<facet_expression>] <facet_spec> ::= <facet_name> [<facet_expression>]
<facet_expression> ::= '(' <expand_flag> <count> <count> <facet_ordering> ')' <facet_expression> ::= '(' <expand_flag> <count> <count> <facet_ordering> ')'
<expand_flag> ::= TRUE | FALSE <expand_flag> ::= TRUE | FALSE
<facet_ordering> ::= HITS | VALUE <facet_ordering> ::= HITS | VALUE
<fetching_stored_clause> ::= FETCHING STORED [<fetching_flag>] <fetching_stored_clause> ::= FETCHING STORED [<fetching_flag>]
<fetching_flag> ::= TRUE | FALSE <fetching_flag> ::= TRUE | FALSE
<quoted_string> ::= '"' ( <char> )* '"' <quoted_string> ::= '"' ( <char> )* '"'
| "'" ( <char> )* "'" | "'" ( <char> )* "'"
<identifier> ::= <identifier_start> ( <identifier_part> )* <identifier> ::= <identifier_start> ( <identifier_part> )*
<identifier_start> ::= <alpha> | '-' | '_' <identifier_start> ::= <alpha> | '-' | '_'
<identifier_part> ::= <identifier_start> | <digit> <identifier_part> ::= <identifier_start> | <digit>
<column_name> ::= <identifier> <column_name> ::= <identifier>
<facet_name> ::= <identifier> <facet_name> ::= <identifier>
<alpha> ::= <alpha_lower_case> | <alpha_upper_case> <alpha> ::= <alpha_lower_case> | <alpha_upper_case>
<alpha_upper_case> ::= A | B | C | D | E | F | G | H | I | J | K | L | M | N | O <alpha_upper_case> ::= A | B | C | D | E | F | G | H | I | J | K | L | M | N | O
| P | Q | R | S | T | U | V | W | X | Y | Z | P | Q | R | S | T | U | V | W | X | Y | Z
<alpha_lower_case> ::= a | b | c | d | e | f | g | h | i | j | k | l | m | n | o <alpha_lower_case> ::= a | b | c | d | e | f | g | h | i | j | k | l | m | n | o
| p | q | r | s | t | u | v | w | x | y | z | p | q | r | s | t | u | v | w | x | y | z
<digit> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 <digit> ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
<numeric> ::= <time_expr> | <number> <numeric> ::= <time_expr> | <number>
<number> ::= <integer> | <real> <number> ::= <integer> | <real>
<integer> ::= ( <digit> )+ <integer> ::= ( <digit> )+
<real> ::= ( <digit> )+ '.' ( <digit> )+ <real> ::= ( <digit> )+ '.' ( <digit> )+
<time_expr> ::= <time_span> AGO <time_expr> ::= <time_span> AGO
Expand All @@ -278,13 +321,19 @@
[<time_minute_part>] [<time_second_part>] [<time_millisecond_part>] [<time_minute_part>] [<time_second_part>] [<time_millisecond_part>]
<time_week_part> ::= <integer> ( 'week' | 'weeks' ) <time_week_part> ::= <integer> ( 'week' | 'weeks' )
<time_day_part> ::= <integer> ( 'day' | 'days' ) <time_day_part> ::= <integer> ( 'day' | 'days' )
<time_hour_part> ::= <integer> ( 'hour' | 'hours' ) <time_hour_part> ::= <integer> ( 'hour' | 'hours' )
<time_minute_part> ::= <integer> ( 'minute' | 'minutes' | 'min' | 'mins') <time_minute_part> ::= <integer> ( 'minute' | 'minutes' | 'min' | 'mins')
<time_second_part> ::= <integer> ( 'second' | 'seconds' | 'sec' | 'secs') <time_second_part> ::= <integer> ( 'second' | 'seconds' | 'sec' | 'secs')
<time_millisecond_part> ::= <integer> ( 'millisecond' | 'milliseconds' | 'msec' | 'msecs') <time_millisecond_part> ::= <integer> ( 'millisecond' | 'milliseconds' | 'msec' | 'msecs')
<date_time_string> ::= <digit><digit><digit><digit> ('-' | '/' | '.') <digit><digit> ('-' | '/' | '.') <digit><digit> <date_time_string> ::= <digit><digit><digit><digit> ('-' | '/' | '.') <digit><digit>
('-' | '/' | '.') <digit><digit>
<digit><digit> ':' <digit><digit> ':' <digit><digit> <digit><digit> ':' <digit><digit> ':' <digit><digit>
""" """
Expand Down
47 changes: 47 additions & 0 deletions example/hadoop-indexing/conf/JobCarDemo.job
@@ -0,0 +1,47 @@
type=java
job.class=com.sensei.indexing.hadoop.demo.CarDemo

mapreduce.job.maps=2
sensei.num.shards=3

mapred.job.name=CarDemoShardedIndexing

# if the output.path already exists, delete it first
sensei.force.output.overwrite=true

# adjust this to a small one if mapper number is huge. default is 50Mb = 52428800
sensei.max.ramsize.bytes=52428800

############# path of schema for interpreter #############

##### TextJSON schema Sample (car demo) absolute path ######
sensei.schema.file.url=conf/schema.xml

############ Input and Output ##################

####### Text JSON data (car demo) #####
read.lock=data/cars.json
sensei.input.dirs=data/cars.json

######## Output configuration ######
write.lock=example/hadoop-indexing/output
sensei.output.dir=example/hadoop-indexing/fileoutput

######## Index output location ######
sensei.index.path=example/hadoop-indexing/index

############# schemas for mapper input ################

sensei.input.format=org.apache.hadoop.mapred.TextInputFormat

############## Sharding strategy ################
sensei.distribution.policy=com.sensei.indexing.hadoop.demo.CarShardingStrategy

############# Converter for mapper input (data conversion and filtering) ##########
sensei.mapinput.converter=com.sensei.indexing.hadoop.demo.CarMapInputConverter

############# Analyzer configuration for lucene ###############
sensei.document.analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
sensei.document.analyzer.version=LUCENE_30


65 changes: 65 additions & 0 deletions example/hadoop-indexing/pom.xml
@@ -0,0 +1,65 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.senseidb</groupId>
<artifactId>sensei-parent</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../sensei-parent/pom.xml</relativePath>
</parent>

<artifactId>sensei-example-hadoop</artifactId>
<packaging>jar</packaging>
<name>sensei example hadoop indexing</name>
<description>sensei hadoop indexer example</description>

<!-- Set the compiler to java6 -->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.1</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<groupId>com.github.maven-hadoop.plugin</groupId>
<artifactId>maven-hadoop-plugin</artifactId>
<version>0.20.1</version>
<configuration>
<hadoopHome>/home/jwang/opensource/hadoop-0.21.0</hadoopHome>
</configuration>
</plugin>
</plugins>
</build>


<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>sensei-hadoop-indexing</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.204.0</version>
</dependency>

<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20080701</version>
</dependency>
</dependencies>

</project>
@@ -0,0 +1,36 @@
package com.sensei.indexing.hadoop.demo;



import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import com.sensei.indexing.hadoop.job.MapReduceJob;
import com.sensei.indexing.hadoop.util.PropertiesLoader;


public class CarDemo extends MapReduceJob implements Tool {


public int run(String[] args) throws Exception {
JobConf conf = createJob(CarDemo.class);

conf.setJobName("CarDemo");
JobClient.runJob(conf);
return 0;
}


public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
Configuration conf = PropertiesLoader.loadProperties("example/hadoop-indexing/conf/JobCarDemo.job");
int res = ToolRunner.run(conf, new CarDemo(), args);
long end = System.currentTimeMillis();
System.out.println("Total time: " + (end - start));
System.exit(res);
}

}
@@ -0,0 +1,24 @@
package com.sensei.indexing.hadoop.demo;

import org.json.JSONException;
import org.json.JSONObject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;

import com.sensei.indexing.hadoop.map.MapInputConverter;

public class CarMapInputConverter extends MapInputConverter {

@Override
public JSONObject getJsonInput(Object key, Object value, Configuration conf) throws JSONException {
String line = ((Text) value).toString();
return new JSONObject(line);
}

@Override
protected JSONObject doFilter(JSONObject data) throws Exception {
return data;
}


}

0 comments on commit 838d720

Please sign in to comment.