Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Switching to Maven

  • Loading branch information...
commit 68b781617caa8bb8ec8fc11daa4f95c8ef1e9380 1 parent 728c83e
Xavier xstevens authored

Showing 70 changed files with 564 additions and 88 deletions. Show diff stats Hide diff stats

  1. +3 0  .gitignore
  2. +4 4 README.md
  3. +0 27 build.properties
  4. +0 57 build.xml
  5. BIN  lib/commons-logging-1.1.1.jar
  6. BIN  lib/hadoop-core-0.20.2-cdh3u0.jar
  7. BIN  lib/hadoop-lzo-0.4.8.jar
  8. BIN  lib/hbase-0.90.1-cdh3u0.jar
  9. BIN  lib/jackson-core-asl-1.8.1.jar
  10. BIN  lib/jackson-mapper-asl-1.8.1.jar
  11. BIN  lib/log4j-1.2.16.jar
  12. BIN  lib/lucene-analyzers-3.1.0.jar
  13. BIN  lib/lucene-core-3.1.0.jar
  14. BIN  lib/mahout-collections-1.0.jar
  15. BIN  lib/mahout-core-0.5.jar
  16. BIN  lib/mahout-math-0.5.jar
  17. BIN  lib/mahout-utils-0.5.jar
  18. 0  lib/{opencloud.jar → opencloud-0.2.jar}
  19. BIN  lib/pig-0.8.0-cdh3u0-core.jar
  20. BIN  lib/riak-client-0.14.1-SNAPSHOT.jar
  21. BIN  lib/zookeeper-3.3.3-cdh3u0.jar
  22. +520 0 pom.xml
  23. +37 0 src/assembly/job.xml
  24. 0  src/{ → main}/java/com/mozilla/hadoop/Backup.java
  25. 0  src/{ → main}/java/com/mozilla/hadoop/ClusterHealth.java
  26. 0  src/{ → main}/java/com/mozilla/hadoop/UnknownPathFinder.java
  27. 0  src/{ → main}/java/com/mozilla/hadoop/fs/Dictionary.java
  28. 0  src/{ → main}/java/com/mozilla/hadoop/fs/SequenceFileDirectoryReader.java
  29. 0  src/{ → main}/java/com/mozilla/hadoop/hbase/mapreduce/MultiScanTableInputFormat.java
  30. 0  src/{ → main}/java/com/mozilla/hadoop/hbase/mapreduce/MultiScanTableMapReduceUtil.java
  31. 0  src/{ → main}/java/com/mozilla/hadoop/mapreduce/lib/UniqueIdentityReducer.java
  32. 0  src/{ → main}/java/com/mozilla/hadoop/riak/RiakExportToHDFS.java
  33. 0  src/{ → main}/java/com/mozilla/hive/serde/TestPilotJsonSerde.java
  34. 0  src/{ → main}/java/com/mozilla/lucene/analysis/en/EnglishAnalyzer.java
  35. 0  src/{ → main}/java/com/mozilla/lucene/analysis/en/NGramEnglishAnalyzer.java
  36. 0  src/{ → main}/java/com/mozilla/lucene/analysis/en/ShingleAllStopFilter.java
  37. 0  src/{ → main}/java/com/mozilla/mahout/clustering/display/kmeans/DisplayKMeansBase.java
  38. 0  src/{ → main}/java/com/mozilla/mahout/clustering/display/kmeans/OriginalText.java
  39. 0  src/{ → main}/java/com/mozilla/mahout/clustering/display/kmeans/WordCloud.java
  40. 0  src/{ → main}/java/com/mozilla/mahout/clustering/display/lda/DisplayLDATopics.java
  41. 0  src/{ → main}/java/com/mozilla/mahout/clustering/display/lda/OriginalText.java
  42. 0  src/{ → main}/java/com/mozilla/pig/eval/BytesSize.java
  43. 0  src/{ → main}/java/com/mozilla/pig/eval/ConvertBagToTuple.java
  44. 0  src/{ → main}/java/com/mozilla/pig/eval/ConvertMapToBag.java
  45. 0  src/{ → main}/java/com/mozilla/pig/eval/Size.java
  46. 0  src/{ → main}/java/com/mozilla/pig/eval/date/ConvertDateFormat.java
  47. 0  src/{ → main}/java/com/mozilla/pig/eval/date/FormatDate.java
  48. 0  src/{ → main}/java/com/mozilla/pig/eval/date/ParseDate.java
  49. 0  src/{ → main}/java/com/mozilla/pig/eval/json/JsonMap.java
  50. 0  src/{ → main}/java/com/mozilla/pig/eval/ml/TFIDFVectorizer.java
  51. 0  src/{ → main}/java/com/mozilla/pig/eval/ml/TFVectorizer.java
  52. 0  src/{ → main}/java/com/mozilla/pig/eval/ml/Vectorizer.java
  53. 0  src/{ → main}/java/com/mozilla/pig/eval/regex/EncodeChromeUrl.java
  54. 0  src/{ → main}/java/com/mozilla/pig/eval/regex/FindAll.java
  55. 0  src/{ → main}/java/com/mozilla/pig/eval/text/NGramTokenize.java
  56. 0  src/{ → main}/java/com/mozilla/pig/eval/text/TermFrequency.java
  57. 0  src/{ → main}/java/com/mozilla/pig/eval/text/Tokenize.java
  58. 0  src/{ → main}/java/com/mozilla/pig/eval/text/UnigramExtractor.java
  59. 0  src/{ → main}/java/com/mozilla/pig/filter/InDictionary.java
  60. 0  src/{ → main}/java/com/mozilla/pig/load/HBaseMultiScanLoader.java
  61. 0  src/{ → main}/java/com/mozilla/pig/storage/DocumentVectorStorage.java
  62. 0  src/{ → main}/java/com/mozilla/pig/storage/RegExLoader.java
  63. 0  src/{ → main}/java/com/mozilla/pig/storage/SequenceFileStorage.java
  64. 0  src/{ → main}/java/com/mozilla/pig/storage/apachelog/WeblogsLoader.java
  65. 0  src/{ → main}/java/com/mozilla/util/DateUtil.java
  66. 0  src/{ → main}/java/com/mozilla/util/Pair.java
  67. 0  src/{ → main}/java/com/mozilla/util/StringUtil.java
  68. 0  src/{ → main}/java/com/mozilla/util/TextUtil.java
  69. 0  src/{ → main}/python/format_riak_keys.py
  70. 0  src/{ → main}/python/lsr_diff.py
3  .gitignore
... ... @@ -1,6 +1,8 @@
1 1 build
  2 +target
2 3 .project
3 4 .classpath
  5 +.settings
4 6
5 7 # Compiled source #
6 8 ###################
@@ -25,6 +27,7 @@ build
25 27
26 28 # Logs and databases #
27 29 ######################
  30 +logs
28 31 *.log
29 32 *.sql
30 33 *.sqlite
8 README.md
Source Rendered
... ... @@ -1,8 +1,8 @@
1 1 # Akela #
2 2
3   -Version: 0.1
  3 +Version: 0.2
4 4
5   -#### A bunch of utility classes for Java, Hadoop, HBase, Pig, etc. ####
  5 +#### Mozilla's utility library for Hadoop, HBase, Pig, etc. ####
6 6
7 7 ### Version Compatability ###
8 8 This code is built with the following assumptions. You may get mixed results if you deviate from these versions.
@@ -15,11 +15,11 @@ This code is built with the following assumptions. You may get mixed results if
15 15 ### Building ###
16 16 To make a jar you can do:
17 17
18   -`ant jar`
  18 +`mvn package`
19 19
20 20 To make a Hadoop MapReduce job jar with no defined main class in the manifest:
21 21
22   -`ant hadoop-jar`
  22 +`mvn assembly:assembly`
23 23
24 24
25 25 ### License ###
27 build.properties
... ... @@ -1,27 +0,0 @@
1   -project.name = akela
2   -project.version = 0.1
3   -
4   -basedir = .
5   -src.dir = ${basedir}/src
6   -lib.dir = ${basedir}/lib
7   -conf.dir = ${basedir}/conf
8   -bin.dir = ${basedir}/bin
9   -build.dir = ${basedir}/build
10   -dist.dir = ${basedir}/dist
11   -
12   -build.classes = ${build.dir}/classes
13   -build.javadoc = ${build.dir}/javadoc
14   -build.encoding = ISO-8859-1
15   -build.lib = ${build.dir}/lib
16   -
17   -# Java
18   -java.src.dir = ${src.dir}/java
19   -javadoc.packages=*
20   -
21   -debug=off
22   -optimize=on
23   -deprecation=on
24   -build.compiler=modern
25   -
26   -java.src.version=1.6
27   -java.target.version=1.6
57 build.xml
... ... @@ -1,57 +0,0 @@
1   -<?xml version="1.0"?>
2   -<project name="akela" default="jar" basedir=".">
3   -
4   - <!-- Load all properties -->
5   - <property file="${basedir}/build.properties"/>
6   -
7   - <path id="classpath">
8   - <pathelement location="${build.classes}"/>
9   - <pathelement location="."/>
10   - <fileset dir="${lib.dir}">
11   - <include name="*.jar"/>
12   - </fileset>
13   - </path>
14   -
15   - <target name="clean">
16   - <delete includeEmptyDirs="true" failonerror="false">
17   - <fileset dir="build"/>
18   - </delete>
19   - </target>
20   -
21   - <target name="init" depends="clean">
22   - <mkdir dir="${build.dir}"/>
23   - <mkdir dir="${build.classes}"/>
24   - <mkdir dir="${build.javadoc}"/>
25   - <mkdir dir="${build.lib}"/>
26   - </target>
27   -
28   - <target name="compile-java" depends="init">
29   - <javac encoding="${build.encoding}" srcdir="${java.src.dir}" includes="**/*.java" destdir="${build.classes}" debug="${debug}" source="${java.src.version}" optimize="${optimize}" deprecation="${deprecation}" includeantruntime="false">
30   - <classpath refid="classpath"/>
31   - <compilerarg line="-Xlint:unchecked"/>
32   - </javac>
33   - </target>
34   -
35   - <target name="compile" depends="compile-java"/>
36   -
37   - <target name="hadoop-jar" depends="compile">
38   - <delete file="${build.lib}/${project.name}-job.jar"/>
39   - <jar destfile="${build.lib}/${project.name}-job.jar">
40   - <fileset dir="${build.classes}"/>
41   - <fileset dir=".">
42   - <include name="lib/*.jar"/>
43   - <exclude name="lib/hadoop*.jar"/>
44   - <exclude name="lib/hbase*.jar"/>
45   - <exclude name="lib/zookeeper*.jar"/>
46   - <exclude name="lib/pig*.jar"/>
47   - </fileset>
48   - </jar>
49   - </target>
50   -
51   - <target name="jar" depends="compile">
52   - <jar destfile="${build.lib}/${project.name}-${project.version}.jar">
53   - <fileset dir="${build.classes}"/>
54   - </jar>
55   - </target>
56   -
57   -</project>
BIN  lib/commons-logging-1.1.1.jar
Binary file not shown
BIN  lib/hadoop-core-0.20.2-cdh3u0.jar
Binary file not shown
BIN  lib/hadoop-lzo-0.4.8.jar
Binary file not shown
BIN  lib/hbase-0.90.1-cdh3u0.jar
Binary file not shown
BIN  lib/jackson-core-asl-1.8.1.jar
Binary file not shown
BIN  lib/jackson-mapper-asl-1.8.1.jar
Binary file not shown
BIN  lib/log4j-1.2.16.jar
Binary file not shown
BIN  lib/lucene-analyzers-3.1.0.jar
Binary file not shown
BIN  lib/lucene-core-3.1.0.jar
Binary file not shown
BIN  lib/mahout-collections-1.0.jar
Binary file not shown
BIN  lib/mahout-core-0.5.jar
Binary file not shown
BIN  lib/mahout-math-0.5.jar
Binary file not shown
BIN  lib/mahout-utils-0.5.jar
Binary file not shown
0  lib/opencloud.jar → lib/opencloud-0.2.jar
File renamed without changes
BIN  lib/pig-0.8.0-cdh3u0-core.jar
Binary file not shown
BIN  lib/riak-client-0.14.1-SNAPSHOT.jar
Binary file not shown
BIN  lib/zookeeper-3.3.3-cdh3u0.jar
Binary file not shown
520 pom.xml
... ... @@ -0,0 +1,520 @@
  1 +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  3 + <modelVersion>4.0.0</modelVersion>
  4 +
  5 + <groupId>com.mozilla</groupId>
  6 + <artifactId>akela</artifactId>
  7 + <version>0.2-SNAPSHOT</version>
  8 +
  9 + <name>akela</name>
  10 + <description></description>
  11 + <url>https://github.com/mozilla-metrics/akela</url>
  12 +
  13 + <packaging>jar</packaging>
  14 +
  15 + <properties>
  16 + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  17 + <skip.tests>false</skip.tests>
  18 + </properties>
  19 +
  20 + <repositories>
  21 + <repository>
  22 + <id>java.net</id>
  23 + <url>http://download.java.net/maven/2</url>
  24 + <name>java.net Maven Repository</name>
  25 + </repository>
  26 + <repository>
  27 + <id>cdh.release.repo</id>
  28 + <url>https://repository.cloudera.com/content/repositories/releases</url>
  29 + <name>Cloudera Releases Repository</name>
  30 + <snapshots><enabled>true</enabled></snapshots>
  31 + </repository>
  32 + <repository>
  33 + <id>oss.sonatype.org</id>
  34 + <url>http://oss.sonatype.org/content/repositories/releases</url>
  35 + <name>Sonatype's OSS Maven Repository</name>
  36 + </repository>
  37 + <repository>
  38 + <id>oracle</id>
  39 + <url>http://download.oracle.com/maven</url>
  40 + <name>Oracle's Maven Repository</name>
  41 + </repository>
  42 + </repositories>
  43 +
  44 + <dependencies>
  45 +
  46 + <dependency>
  47 + <groupId>junit</groupId>
  48 + <artifactId>junit</artifactId>
  49 + <version>4.8.2</version>
  50 + <scope>test</scope>
  51 + </dependency>
  52 +
  53 + <dependency>
  54 + <groupId>log4j</groupId>
  55 + <artifactId>log4j</artifactId>
  56 + <version>1.2.16</version>
  57 + </dependency>
  58 +
  59 + <dependency>
  60 + <groupId>commons-lang</groupId>
  61 + <artifactId>commons-lang</artifactId>
  62 + <version>2.3</version>
  63 + </dependency>
  64 +
  65 + <!-- Jackson JSON Processor -->
  66 + <dependency>
  67 + <groupId>org.codehaus.jackson</groupId>
  68 + <artifactId>jackson-core-asl</artifactId>
  69 + <version>1.8.3</version>
  70 + </dependency>
  71 + <dependency>
  72 + <groupId>org.codehaus.jackson</groupId>
  73 + <artifactId>jackson-mapper-asl</artifactId>
  74 + <version>1.8.3</version>
  75 + </dependency>
  76 +
  77 + <!-- Cloudera Hadoop, HBase -->
  78 + <dependency>
  79 + <groupId>org.apache.hadoop</groupId>
  80 + <artifactId>hadoop-core</artifactId>
  81 + <version>0.20.2-cdh3u0</version>
  82 + <scope>provided</scope>
  83 + <exclusions>
  84 + <exclusion>
  85 + <artifactId>jasper-runtime</artifactId>
  86 + <groupId>tomcat</groupId>
  87 + </exclusion>
  88 + <exclusion>
  89 + <artifactId>commons-httpclient</artifactId>
  90 + <groupId>commons-httpclient</groupId>
  91 + </exclusion>
  92 + <exclusion>
  93 + <artifactId>hsqldb</artifactId>
  94 + <groupId>hsqldb</groupId>
  95 + </exclusion>
  96 + <exclusion>
  97 + <artifactId>commons-cli</artifactId>
  98 + <groupId>commons-cli</groupId>
  99 + </exclusion>
  100 + <exclusion>
  101 + <artifactId>commons-logging</artifactId>
  102 + <groupId>commons-logging</groupId>
  103 + </exclusion>
  104 + <exclusion>
  105 + <artifactId>jsp-api</artifactId>
  106 + <groupId>javax.servlet</groupId>
  107 + </exclusion>
  108 + <exclusion>
  109 + <artifactId>hadoop-ant</artifactId>
  110 + <groupId>com.cloudera.cdh</groupId>
  111 + </exclusion>
  112 + <exclusion>
  113 + <artifactId>commons-net</artifactId>
  114 + <groupId>commons-net</groupId>
  115 + </exclusion>
  116 + <exclusion>
  117 + <artifactId>jasper-compiler</artifactId>
  118 + <groupId>tomcat</groupId>
  119 + </exclusion>
  120 + <exclusion>
  121 + <artifactId>commons-el</artifactId>
  122 + <groupId>commons-el</groupId>
  123 + </exclusion>
  124 + <exclusion>
  125 + <artifactId>ant</artifactId>
  126 + <groupId>ant</groupId>
  127 + </exclusion>
  128 + <exclusion>
  129 + <artifactId>jets3t</artifactId>
  130 + <groupId>net.java.dev.jets3t</groupId>
  131 + </exclusion>
  132 + <exclusion>
  133 + <artifactId>xmlenc</artifactId>
  134 + <groupId>xmlenc</groupId>
  135 + </exclusion>
  136 + <exclusion>
  137 + <artifactId>core</artifactId>
  138 + <groupId>org.eclipse.jdt</groupId>
  139 + </exclusion>
  140 + <exclusion>
  141 + <artifactId>commons-codec</artifactId>
  142 + <groupId>commons-codec</groupId>
  143 + </exclusion>
  144 + <exclusion>
  145 + <artifactId>oro</artifactId>
  146 + <groupId>oro</groupId>
  147 + </exclusion>
  148 + <exclusion>
  149 + <artifactId>jsp-api</artifactId>
  150 + <groupId>javax.servlet.jsp</groupId>
  151 + </exclusion>
  152 + <exclusion>
  153 + <artifactId>jetty-util</artifactId>
  154 + <groupId>org.mortbay.jetty</groupId>
  155 + </exclusion>
  156 + <exclusion>
  157 + <artifactId>jetty</artifactId>
  158 + <groupId>org.mortbay.jetty</groupId>
  159 + </exclusion>
  160 + <exclusion>
  161 + <artifactId>servlet-api</artifactId>
  162 + <groupId>javax.servlet</groupId>
  163 + </exclusion>
  164 + </exclusions>
  165 + </dependency>
  166 + <dependency>
  167 + <groupId>org.apache.hbase</groupId>
  168 + <artifactId>hbase</artifactId>
  169 + <version>0.90.1-cdh3u0</version>
  170 + <scope>provided</scope>
  171 + <exclusions>
  172 + <exclusion>
  173 + <artifactId>commons-httpclient</artifactId>
  174 + <groupId>commons-httpclient</groupId>
  175 + </exclusion>
  176 + <exclusion>
  177 + <artifactId>jackson-xc</artifactId>
  178 + <groupId>org.codehaus.jackson</groupId>
  179 + </exclusion>
  180 + <exclusion>
  181 + <artifactId>commons-cli</artifactId>
  182 + <groupId>commons-cli</groupId>
  183 + </exclusion>
  184 + <exclusion>
  185 + <artifactId>jruby-complete</artifactId>
  186 + <groupId>org.jruby</groupId>
  187 + </exclusion>
  188 + <exclusion>
  189 + <artifactId>thrift</artifactId>
  190 + <groupId>org.apache.thrift</groupId>
  191 + </exclusion>
  192 + <exclusion>
  193 + <artifactId>slf4j-log4j12</artifactId>
  194 + <groupId>org.slf4j</groupId>
  195 + </exclusion>
  196 + <exclusion>
  197 + <artifactId>jasper-compiler</artifactId>
  198 + <groupId>tomcat</groupId>
  199 + </exclusion>
  200 + <exclusion>
  201 + <artifactId>core</artifactId>
  202 + <groupId>org.eclipse.jdt</groupId>
  203 + </exclusion>
  204 + <exclusion>
  205 + <artifactId>commons-codec</artifactId>
  206 + <groupId>commons-codec</groupId>
  207 + </exclusion>
  208 + <exclusion>
  209 + <artifactId>jasper-runtime</artifactId>
  210 + <groupId>tomcat</groupId>
  211 + </exclusion>
  212 + <exclusion>
  213 + <artifactId>slf4j-api</artifactId>
  214 + <groupId>org.slf4j</groupId>
  215 + </exclusion>
  216 + <exclusion>
  217 + <artifactId>avro</artifactId>
  218 + <groupId>org.apache.hadoop</groupId>
  219 + </exclusion>
  220 + <exclusion>
  221 + <artifactId>jsp-2.1</artifactId>
  222 + <groupId>org.mortbay.jetty</groupId>
  223 + </exclusion>
  224 + <exclusion>
  225 + <artifactId>stax-api</artifactId>
  226 + <groupId>stax</groupId>
  227 + </exclusion>
  228 + <exclusion>
  229 + <artifactId>guava</artifactId>
  230 + <groupId>com.google.guava</groupId>
  231 + </exclusion>
  232 + <exclusion>
  233 + <artifactId>jettison</artifactId>
  234 + <groupId>org.codehaus.jettison</groupId>
  235 + </exclusion>
  236 + <exclusion>
  237 + <artifactId>commons-lang</artifactId>
  238 + <groupId>commons-lang</groupId>
  239 + </exclusion>
  240 + <exclusion>
  241 + <artifactId>jsp-api-2.1</artifactId>
  242 + <groupId>org.mortbay.jetty</groupId>
  243 + </exclusion>
  244 + <exclusion>
  245 + <artifactId>jersey-json</artifactId>
  246 + <groupId>com.sun.jersey</groupId>
  247 + </exclusion>
  248 + <exclusion>
  249 + <artifactId>zookeeper-ant</artifactId>
  250 + <groupId>com.cloudera.cdh</groupId>
  251 + </exclusion>
  252 + <exclusion>
  253 + <artifactId>protobuf-java</artifactId>
  254 + <groupId>com.google.protobuf</groupId>
  255 + </exclusion>
  256 + <exclusion>
  257 + <artifactId>jaxb-api</artifactId>
  258 + <groupId>javax.xml.bind</groupId>
  259 + </exclusion>
  260 + <exclusion>
  261 + <artifactId>jersey-core</artifactId>
  262 + <groupId>com.sun.jersey</groupId>
  263 + </exclusion>
  264 + <exclusion>
  265 + <artifactId>jersey-server</artifactId>
  266 + <groupId>com.sun.jersey</groupId>
  267 + </exclusion>
  268 + <exclusion>
  269 + <artifactId>jetty-util</artifactId>
  270 + <groupId>org.mortbay.jetty</groupId>
  271 + </exclusion>
  272 + <exclusion>
  273 + <artifactId>jetty</artifactId>
  274 + <groupId>org.mortbay.jetty</groupId>
  275 + </exclusion>
  276 + <exclusion>
  277 + <artifactId>jsr311-api</artifactId>
  278 + <groupId>javax.ws.rs</groupId>
  279 + </exclusion>
  280 + <exclusion>
  281 + <artifactId>servlet-api-2.5</artifactId>
  282 + <groupId>org.mortbay.jetty</groupId>
  283 + </exclusion>
  284 + </exclusions>
  285 + </dependency>
  286 +
  287 + <!-- Thrift -->
  288 + <dependency>
  289 + <groupId>org.apache.thrift</groupId>
  290 + <artifactId>thrift</artifactId>
  291 + <version>0.2.0</version>
  292 + <scope>system</scope>
  293 + <systemPath>${basedir}/lib/thrift-0.2.0.jar</systemPath>
  294 + </dependency>
  295 +
  296 + <!-- Lucene -->
  297 + <dependency>
  298 + <groupId>org.apache.lucene</groupId>
  299 + <artifactId>lucene-core</artifactId>
  300 + <version>3.3.0</version>
  301 + </dependency>
  302 + <dependency>
  303 + <groupId>org.apache.lucene</groupId>
  304 + <artifactId>lucene-analyzers</artifactId>
  305 + <version>3.3.0</version>
  306 + </dependency>
  307 +
  308 + <!-- Mahout -->
  309 + <dependency>
  310 + <groupId>org.apache.mahout</groupId>
  311 + <artifactId>mahout-core</artifactId>
  312 + <version>0.5</version>
  313 + <exclusions>
  314 + <exclusion>
  315 + <artifactId>commons-cli</artifactId>
  316 + <groupId>org.apache.mahout.commons</groupId>
  317 + </exclusion>
  318 + <exclusion>
  319 + <artifactId>uncommons-maths</artifactId>
  320 + <groupId>org.uncommons.maths</groupId>
  321 + </exclusion>
  322 + <exclusion>
  323 + <artifactId>watchmaker-framework</artifactId>
  324 + <groupId>org.uncommons.watchmaker</groupId>
  325 + </exclusion>
  326 + <exclusion>
  327 + <artifactId>commons-pool</artifactId>
  328 + <groupId>commons-pool</groupId>
  329 + </exclusion>
  330 + <exclusion>
  331 + <artifactId>commons-dbcp</artifactId>
  332 + <groupId>commons-dbcp</groupId>
  333 + </exclusion>
  334 + <exclusion>
  335 + <artifactId>commons-collections</artifactId>
  336 + <groupId>commons-collections</groupId>
  337 + </exclusion>
  338 + <exclusion>
  339 + <artifactId>xpp3_min</artifactId>
  340 + <groupId>xpp3</groupId>
  341 + </exclusion>
  342 + <exclusion>
  343 + <artifactId>slf4j-api</artifactId>
  344 + <groupId>org.slf4j</groupId>
  345 + </exclusion>
  346 + <exclusion>
  347 + <artifactId>xstream</artifactId>
  348 + <groupId>com.thoughtworks.xstream</groupId>
  349 + </exclusion>
  350 + </exclusions>
  351 + </dependency>
  352 + <dependency>
  353 + <groupId>org.apache.mahout</groupId>
  354 + <artifactId>mahout-math</artifactId>
  355 + <version>0.5</version>
  356 + <exclusions>
  357 + <exclusion>
  358 + <artifactId>uncommons-maths</artifactId>
  359 + <groupId>org.uncommons.maths</groupId>
  360 + </exclusion>
  361 + <exclusion>
  362 + <artifactId>guava</artifactId>
  363 + <groupId>com.google.guava</groupId>
  364 + </exclusion>
  365 + <exclusion>
  366 + <artifactId>slf4j-api</artifactId>
  367 + <groupId>org.slf4j</groupId>
  368 + </exclusion>
  369 + </exclusions>
  370 + </dependency>
  371 + <dependency>
  372 + <groupId>org.apache.mahout</groupId>
  373 + <artifactId>mahout-utils</artifactId>
  374 + <version>0.5</version>
  375 + <exclusions>
  376 + <exclusion>
  377 + <artifactId>slf4j-jcl</artifactId>
  378 + <groupId>org.slf4j</groupId>
  379 + </exclusion>
  380 + <exclusion>
  381 + <artifactId>slf4j-api</artifactId>
  382 + <groupId>org.slf4j</groupId>
  383 + </exclusion>
  384 + <exclusion>
  385 + <artifactId>solr-commons-csv</artifactId>
  386 + <groupId>org.apache.solr</groupId>
  387 + </exclusion>
  388 + </exclusions>
  389 + </dependency>
  390 + <dependency>
  391 + <groupId>org.apache.mahout</groupId>
  392 + <artifactId>mahout-collections</artifactId>
  393 + <version>1.0</version>
  394 + </dependency>
  395 +
  396 + <!-- Riak -->
  397 + <dependency>
  398 + <groupId>com.basho.riak</groupId>
  399 + <artifactId>riak-client</artifactId>
  400 + <version>0.14.1</version>
  401 + <exclusions>
  402 + <exclusion>
  403 + <artifactId>commons-httpclient</artifactId>
  404 + <groupId>commons-httpclient</groupId>
  405 + </exclusion>
  406 + <exclusion>
  407 + <artifactId>protobuf-java</artifactId>
  408 + <groupId>com.google.protobuf</groupId>
  409 + </exclusion>
  410 + <exclusion>
  411 + <artifactId>commons-codec</artifactId>
  412 + <groupId>commons-codec</groupId>
  413 + </exclusion>
  414 + </exclusions>
  415 + </dependency>
  416 +
  417 + <!-- Pig -->
  418 + <dependency>
  419 + <groupId>org.apache.pig</groupId>
  420 + <artifactId>pig</artifactId>
  421 + <version>0.8.0-cdh3u0</version>
  422 + <scope>provided</scope>
  423 + <exclusions>
  424 + <exclusion>
  425 + <artifactId>jline</artifactId>
  426 + <groupId>jline</groupId>
  427 + </exclusion>
  428 + <exclusion>
  429 + <artifactId>jsch</artifactId>
  430 + <groupId>com.jcraft</groupId>
  431 + </exclusion>
  432 + <exclusion>
  433 + <artifactId>joda-time</artifactId>
  434 + <groupId>joda-time</groupId>
  435 + </exclusion>
  436 + <exclusion>
  437 + <artifactId>jython</artifactId>
  438 + <groupId>org.python</groupId>
  439 + </exclusion>
  440 + <exclusion>
  441 + <artifactId>guava</artifactId>
  442 + <groupId>com.google.guava</groupId>
  443 + </exclusion>
  444 + <exclusion>
  445 + <artifactId>hadoop-root</artifactId>
  446 + <groupId>com.cloudera.cdh</groupId>
  447 + </exclusion>
  448 + <exclusion>
  449 + <artifactId>pig-ant</artifactId>
  450 + <groupId>com.cloudera.cdh</groupId>
  451 + </exclusion>
  452 + </exclusions>
  453 + </dependency>
  454 +
  455 + <!-- Hive -->
  456 + <!-- We have customizations in these jars -->
  457 + <dependency>
  458 + <groupId>org.apache.hadoop.hive</groupId>
  459 + <artifactId>hive-exec</artifactId>
  460 + <version>0.7.0-cdh3u0</version>
  461 + <scope>system</scope>
  462 + <systemPath>${basedir}/lib/hive-exec-0.7.0-cdh3u0.jar</systemPath>
  463 + </dependency>
  464 + <dependency>
  465 + <groupId>org.apache.hadoop.hive</groupId>
  466 + <artifactId>hive-serde</artifactId>
  467 + <version>0.7.0-cdh3u0</version>
  468 + <scope>system</scope>
  469 + <systemPath>${basedir}/lib/hive-serde-0.7.0-cdh3u0.jar</systemPath>
  470 + </dependency>
  471 +
  472 + <!-- OpenCloud -->
  473 + <dependency>
  474 + <groupId>org.mcavallo</groupId>
  475 + <artifactId>opencloud</artifactId>
  476 + <version>0.2</version>
  477 + <scope>system</scope>
  478 + <systemPath>${basedir}/lib/opencloud-0.2.jar</systemPath>
  479 + </dependency>
  480 +
  481 + </dependencies>
  482 +
  483 + <build>
  484 + <finalName>akela</finalName>
  485 + <plugins>
  486 +
  487 + <plugin>
  488 + <groupId>org.apache.maven.plugins</groupId>
  489 + <artifactId>maven-compiler-plugin</artifactId>
  490 + <version>2.3.2</version>
  491 + <configuration>
  492 + <source>1.6</source>
  493 + <target>1.6</target>
  494 + </configuration>
  495 + </plugin>
  496 +
  497 + <plugin>
  498 + <groupId>org.apache.maven.plugins</groupId>
  499 + <artifactId>maven-jar-plugin</artifactId>
  500 + <version>2.3.1</version>
  501 + <configuration>
  502 + <finalName>${project.name}-${project.version}</finalName>
  503 + </configuration>
  504 + </plugin>
  505 +
  506 + <plugin>
  507 + <artifactId>maven-assembly-plugin</artifactId>
  508 + <configuration>
  509 + <finalName>${project.name}-${project.version}</finalName>
  510 + <appendAssemblyId>true</appendAssemblyId>
  511 + <descriptors>
  512 + <descriptor>src/assembly/job.xml</descriptor>
  513 + </descriptors>
  514 + </configuration>
  515 + </plugin>
  516 +
  517 + </plugins>
  518 + </build>
  519 +
  520 +</project>
37 src/assembly/job.xml
... ... @@ -0,0 +1,37 @@
  1 +<assembly
  2 + xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
  3 + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
  5 + <id>job</id>
  6 + <formats>
  7 + <format>jar</format>
  8 + </formats>
  9 + <includeBaseDirectory>false</includeBaseDirectory>
  10 + <dependencySets>
  11 + <dependencySet>
  12 + <unpack>false</unpack>
  13 + <scope>runtime</scope>
  14 + <outputDirectory>lib</outputDirectory>
  15 + <excludes>
  16 + <exclude>${artifact.groupId}:${artifact.artifactId}</exclude>
  17 + </excludes>
  18 + </dependencySet>
  19 + <dependencySet>
  20 + <unpack>false</unpack>
  21 + <scope>system</scope>
  22 + <outputDirectory>lib</outputDirectory>
  23 + <excludes>
  24 + <exclude>${artifact.groupId}:${artifact.artifactId}</exclude>
  25 + </excludes>
  26 + </dependencySet>
  27 + </dependencySets>
  28 + <fileSets>
  29 + <fileSet>
  30 + <directory>${basedir}/target/classes</directory>
  31 + <outputDirectory>/</outputDirectory>
  32 + <excludes>
  33 + <exclude>*.jar</exclude>
  34 + </excludes>
  35 + </fileSet>
  36 + </fileSets>
  37 +</assembly>
0  src/java/com/mozilla/hadoop/Backup.java → src/main/java/com/mozilla/hadoop/Backup.java
File renamed without changes
0  src/java/com/mozilla/hadoop/ClusterHealth.java → src/main/java/com/mozilla/hadoop/ClusterHealth.java
File renamed without changes
0  src/java/com/mozilla/hadoop/UnknownPathFinder.java → ...in/java/com/mozilla/hadoop/UnknownPathFinder.java
File renamed without changes
0  src/java/com/mozilla/hadoop/fs/Dictionary.java → src/main/java/com/mozilla/hadoop/fs/Dictionary.java
File renamed without changes
0  ...ozilla/hadoop/fs/SequenceFileDirectoryReader.java → ...ozilla/hadoop/fs/SequenceFileDirectoryReader.java
File renamed without changes
0  ...op/hbase/mapreduce/MultiScanTableInputFormat.java → ...op/hbase/mapreduce/MultiScanTableInputFormat.java
File renamed without changes
0  .../hbase/mapreduce/MultiScanTableMapReduceUtil.java → .../hbase/mapreduce/MultiScanTableMapReduceUtil.java
File renamed without changes
0  ...a/hadoop/mapreduce/lib/UniqueIdentityReducer.java → ...a/hadoop/mapreduce/lib/UniqueIdentityReducer.java
File renamed without changes
0  ...ava/com/mozilla/hadoop/riak/RiakExportToHDFS.java → ...ava/com/mozilla/hadoop/riak/RiakExportToHDFS.java
File renamed without changes
0  ...va/com/mozilla/hive/serde/TestPilotJsonSerde.java → ...va/com/mozilla/hive/serde/TestPilotJsonSerde.java
File renamed without changes
0  ...m/mozilla/lucene/analysis/en/EnglishAnalyzer.java → ...m/mozilla/lucene/analysis/en/EnglishAnalyzer.java
File renamed without changes
0  ...illa/lucene/analysis/en/NGramEnglishAnalyzer.java → ...illa/lucene/analysis/en/NGramEnglishAnalyzer.java
File renamed without changes
0  ...illa/lucene/analysis/en/ShingleAllStopFilter.java → ...illa/lucene/analysis/en/ShingleAllStopFilter.java
File renamed without changes
0  .../clustering/display/kmeans/DisplayKMeansBase.java → .../clustering/display/kmeans/DisplayKMeansBase.java
File renamed without changes
0  ...ahout/clustering/display/kmeans/OriginalText.java → ...ahout/clustering/display/kmeans/OriginalText.java
File renamed without changes
0  ...a/mahout/clustering/display/kmeans/WordCloud.java → ...a/mahout/clustering/display/kmeans/WordCloud.java
File renamed without changes
0  ...hout/clustering/display/lda/DisplayLDATopics.java → ...hout/clustering/display/lda/DisplayLDATopics.java
File renamed without changes
0  ...a/mahout/clustering/display/lda/OriginalText.java → ...a/mahout/clustering/display/lda/OriginalText.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/BytesSize.java → src/main/java/com/mozilla/pig/eval/BytesSize.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/ConvertBagToTuple.java → .../java/com/mozilla/pig/eval/ConvertBagToTuple.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/ConvertMapToBag.java → ...in/java/com/mozilla/pig/eval/ConvertMapToBag.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/Size.java → src/main/java/com/mozilla/pig/eval/Size.java
File renamed without changes
0  .../com/mozilla/pig/eval/date/ConvertDateFormat.java → .../com/mozilla/pig/eval/date/ConvertDateFormat.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/date/FormatDate.java → ...in/java/com/mozilla/pig/eval/date/FormatDate.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/date/ParseDate.java → ...ain/java/com/mozilla/pig/eval/date/ParseDate.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/json/JsonMap.java → src/main/java/com/mozilla/pig/eval/json/JsonMap.java
File renamed without changes
0  ...java/com/mozilla/pig/eval/ml/TFIDFVectorizer.java → ...java/com/mozilla/pig/eval/ml/TFIDFVectorizer.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/ml/TFVectorizer.java → ...in/java/com/mozilla/pig/eval/ml/TFVectorizer.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/ml/Vectorizer.java → ...main/java/com/mozilla/pig/eval/ml/Vectorizer.java
File renamed without changes
0  ...a/com/mozilla/pig/eval/regex/EncodeChromeUrl.java → ...a/com/mozilla/pig/eval/regex/EncodeChromeUrl.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/regex/FindAll.java → ...main/java/com/mozilla/pig/eval/regex/FindAll.java
File renamed without changes
0  ...java/com/mozilla/pig/eval/text/NGramTokenize.java → ...java/com/mozilla/pig/eval/text/NGramTokenize.java
File renamed without changes
0  ...java/com/mozilla/pig/eval/text/TermFrequency.java → ...java/com/mozilla/pig/eval/text/TermFrequency.java
File renamed without changes
0  src/java/com/mozilla/pig/eval/text/Tokenize.java → ...main/java/com/mozilla/pig/eval/text/Tokenize.java
File renamed without changes
0  ...a/com/mozilla/pig/eval/text/UnigramExtractor.java → ...a/com/mozilla/pig/eval/text/UnigramExtractor.java
File renamed without changes
0  src/java/com/mozilla/pig/filter/InDictionary.java → ...ain/java/com/mozilla/pig/filter/InDictionary.java
File renamed without changes
0  ...va/com/mozilla/pig/load/HBaseMultiScanLoader.java → ...va/com/mozilla/pig/load/HBaseMultiScanLoader.java
File renamed without changes
0  ...om/mozilla/pig/storage/DocumentVectorStorage.java → ...om/mozilla/pig/storage/DocumentVectorStorage.java
File renamed without changes
0  src/java/com/mozilla/pig/storage/RegExLoader.java → ...ain/java/com/mozilla/pig/storage/RegExLoader.java
File renamed without changes
0  .../com/mozilla/pig/storage/SequenceFileStorage.java → .../com/mozilla/pig/storage/SequenceFileStorage.java
File renamed without changes
0  .../mozilla/pig/storage/apachelog/WeblogsLoader.java → .../mozilla/pig/storage/apachelog/WeblogsLoader.java
File renamed without changes
0  src/java/com/mozilla/util/DateUtil.java → src/main/java/com/mozilla/util/DateUtil.java
File renamed without changes
0  src/java/com/mozilla/util/Pair.java → src/main/java/com/mozilla/util/Pair.java
File renamed without changes
0  src/java/com/mozilla/util/StringUtil.java → src/main/java/com/mozilla/util/StringUtil.java
File renamed without changes
0  src/java/com/mozilla/util/TextUtil.java → src/main/java/com/mozilla/util/TextUtil.java
File renamed without changes
0  src/python/format_riak_keys.py → src/main/python/format_riak_keys.py
File renamed without changes
0  src/python/lsr_diff.py → src/main/python/lsr_diff.py
File renamed without changes

0 comments on commit 68b7816

Please sign in to comment.
Something went wrong with that request. Please try again.