Permalink
Browse files

Initial commit

  • Loading branch information...
matthayes committed Jan 24, 2013
0 parents commit 56e5425e927b8db4a75311585a62cadd2b4a6a2b
Showing with 47,877 additions and 0 deletions.
  1. +13 −0 LICENSE
  2. +30 −0 README.md
  3. +9 −0 hadoop/.classpath
  4. +11 −0 hadoop/.gitignore
  5. +18 −0 hadoop/.project
  6. +59 −0 hadoop/bin/run.sh
  7. +136 −0 hadoop/build.xml
  8. +25 −0 hadoop/config/ivy/ivy.xml
  9. +9 −0 hadoop/config/ivy/ivysettings.xml
  10. +43 −0 hadoop/config/jobs/base.properties
  11. +2 −0 hadoop/config/jobs/white-elephant-full-usage.job
  12. +2 −0 hadoop/config/jobs/white-elephant-incremental-usage.job
  13. BIN hadoop/lib/static/azkaban-common-0.10.jar
  14. +142 −0 hadoop/src/avro_schemas/LogData.avsc
  15. +70 −0 hadoop/src/avro_schemas/Stats.avsc
  16. +155 −0 hadoop/src/java/com/linkedin/whiteelephant/ProcessLogs.java
  17. +448 −0 hadoop/src/java/com/linkedin/whiteelephant/analysis/ComputeUsagePerHour.java
  18. +530 −0 hadoop/src/java/com/linkedin/whiteelephant/mapreduce/MyAvroMultipleOutputs.java
  19. +64 −0 hadoop/src/java/com/linkedin/whiteelephant/mapreduce/lib/input/CombinedTextInputFormat.java
  20. +254 −0 hadoop/src/java/com/linkedin/whiteelephant/mapreduce/lib/job/StagedOutputJob.java
  21. +122 −0 hadoop/src/java/com/linkedin/whiteelephant/mapreduce/lib/job/StagedOutputJobExecutor.java
  22. +339 −0 hadoop/src/java/com/linkedin/whiteelephant/parsing/LineParsing.java
  23. +762 −0 hadoop/src/java/com/linkedin/whiteelephant/parsing/ParseJobsFromLogs.java
  24. +114 −0 hadoop/src/java/com/linkedin/whiteelephant/util/JobStatsProcessing.java
  25. +12 −0 server/.gitignore
  26. +31 −0 server/Gemfile
  27. +95 −0 server/Gemfile.lock
  28. +82 −0 server/app.rb
  29. +7 −0 server/app/assets/javascripts/app.js
  30. +139 −0 server/app/assets/javascripts/controllers/index.js.coffee
  31. +98 −0 server/app/assets/javascripts/rickshaw_plugins/Rickshaw.Fixtures.LocalTime.js
  32. +88 −0 server/app/assets/javascripts/rickshaw_plugins/Rickshaw.Graph.Axis.LocalTime.js
  33. +3 −0 server/app/assets/javascripts/templates/application.hbs
  34. +7 −0 server/app/assets/javascripts/templates/graph.hbs
  35. +13 −0 server/app/assets/javascripts/templates/index.hbs
  36. +23 −0 server/app/assets/javascripts/templates/table.hbs
  37. +52 −0 server/app/assets/javascripts/templates/usage_query.hbs
  38. +16 −0 server/app/assets/javascripts/vendor.js
  39. +203 −0 server/app/assets/javascripts/views/graph.js.coffee
  40. +19 −0 server/app/assets/javascripts/views/index.js.coffee
  41. +162 −0 server/app/assets/javascripts/views/table.js.coffee
  42. +93 −0 server/app/assets/javascripts/views/usage_query.js.coffee
  43. +101 −0 server/app/assets/stylesheets/core.css.less
  44. +39 −0 server/app/assets/stylesheets/tablesorter.less
  45. +50 −0 server/app/cache_builder.rb
  46. +256 −0 server/app/cube.rb
  47. +29 −0 server/app/executor.rb
  48. +610 −0 server/app/usage_data.rb
  49. +158 −0 server/app/usage_database.rb
  50. +184 −0 server/app/usage_file_load_task.rb
  51. +99 −0 server/app/usage_hadoop_loader.rb
  52. +187 −0 server/app/usage_loader.rb
  53. +39 −0 server/app/usage_local_loader.rb
  54. +97 −0 server/build.xml
  55. +43 −0 server/config.ru
  56. +26 −0 server/config.yml
  57. +7 −0 server/config/ivy/ivy.xml
  58. +8 −0 server/config/ivy/ivysettings.xml
  59. +6 −0 server/config/trinidad.yml
  60. +152 −0 server/config/warble.rb
  61. +77 −0 server/environment.rb
  62. +11 −0 server/environment.sh
  63. BIN server/lib/static/hsqldb.jar
  64. +22 −0 server/licenses/backbone-license
  65. +176 −0 server/licenses/bootstrap-license
  66. +26 −0 server/licenses/d3-license
  67. +66 −0 server/licenses/hsqldb-license
  68. +21 −0 server/licenses/jquery-license
  69. +20 −0 server/licenses/jquery-multiselect-license
  70. +7 −0 server/licenses/jquery-tablesorter-license
  71. +26 −0 server/licenses/jquery-ui-license
  72. +22 −0 server/licenses/moment-license
  73. +10 −0 server/licenses/mustache-license
  74. +7 −0 server/licenses/rickshaw-license
  75. +22 −0 server/licenses/underscore-license
  76. +6 −0 server/log4j/log4j.properties
  77. BIN server/public/images/asc.gif
  78. BIN server/public/images/bg.gif
  79. BIN server/public/images/desc.gif
  80. BIN server/public/images/pager/first.png
  81. BIN server/public/images/pager/last.png
  82. BIN server/public/images/pager/next.png
  83. BIN server/public/images/pager/prev.png
  84. BIN server/public/img/glyphicons-halflings-white.png
  85. BIN server/public/img/glyphicons-halflings.png
  86. +234 −0 server/setup.sh
  87. +66 −0 server/src/java/com/linkedin/whiteelephant/TimeZoneConversion.java
  88. +10 −0 server/startup.sh
  89. +187 −0 server/test_data.rb
  90. +38 −0 server/vendor/assets/javascripts/backbone-0.9.2-min.js
  91. +1,431 −0 server/vendor/assets/javascripts/backbone-0.9.2.js
  92. +1 −0 server/vendor/assets/javascripts/d3.layout.min.js
  93. +2 −0 server/vendor/assets/javascripts/d3.min.js
  94. +26,048 −0 server/vendor/assets/javascripts/ember-1.0.0-pre.4.js
  95. +1,920 −0 server/vendor/assets/javascripts/handlebars-1.0.rc.1.js
  96. +4 −0 server/vendor/assets/javascripts/jquery-1.7.2.min.js
  97. +125 −0 server/vendor/assets/javascripts/jquery-ui-1.8.24.custom.min.js
  98. +173 −0 server/vendor/assets/javascripts/jquery.multiselect.filter.js
  99. +705 −0 server/vendor/assets/javascripts/jquery.multiselect.js
  100. +20 −0 server/vendor/assets/javascripts/jquery.multiselect.min.js
  101. +1,031 −0 server/vendor/assets/javascripts/jquery.tablesorter.js
  102. +4 −0 server/vendor/assets/javascripts/jquery.tablesorter.min.js
  103. +184 −0 server/vendor/assets/javascripts/jquery.tablesorter.pager.js
  104. +486 −0 server/vendor/assets/javascripts/json2.js
  105. +1,213 −0 server/vendor/assets/javascripts/moment.js
  106. +6 −0 server/vendor/assets/javascripts/moment.min.js
  107. +536 −0 server/vendor/assets/javascripts/mustache.js
  108. +2 −0 server/vendor/assets/javascripts/rickshaw.min.js
  109. +32 −0 server/vendor/assets/javascripts/underscore-1.3.3-min.js
  110. +34 −0 server/vendor/assets/stylesheets/bootstrap/accordion.less
  111. +65 −0 server/vendor/assets/stylesheets/bootstrap/alerts.less
  112. +62 −0 server/vendor/assets/stylesheets/bootstrap/bootstrap.less
  113. +24 −0 server/vendor/assets/stylesheets/bootstrap/breadcrumbs.less
  114. +245 −0 server/vendor/assets/stylesheets/bootstrap/button-groups.less
  115. +231 −0 server/vendor/assets/stylesheets/bootstrap/buttons.less
  116. +131 −0 server/vendor/assets/stylesheets/bootstrap/carousel.less
  117. +31 −0 server/vendor/assets/stylesheets/bootstrap/close.less
  118. +58 −0 server/vendor/assets/stylesheets/bootstrap/code.less
  119. +22 −0 server/vendor/assets/stylesheets/bootstrap/component-animations.less
  120. +210 −0 server/vendor/assets/stylesheets/bootstrap/dropdowns.less
  121. +650 −0 server/vendor/assets/stylesheets/bootstrap/forms.less
  122. +21 −0 server/vendor/assets/stylesheets/bootstrap/grid.less
  123. +24 −0 server/vendor/assets/stylesheets/bootstrap/hero-unit.less
  124. +72 −0 server/vendor/assets/stylesheets/bootstrap/labels-badges.less
  125. +16 −0 server/vendor/assets/stylesheets/bootstrap/layouts.less
  126. +681 −0 server/vendor/assets/stylesheets/bootstrap/mixins.less
  127. +98 −0 server/vendor/assets/stylesheets/bootstrap/modals.less
  128. +475 −0 server/vendor/assets/stylesheets/bootstrap/navbar.less
  129. +384 −0 server/vendor/assets/stylesheets/bootstrap/navs.less
  130. +40 −0 server/vendor/assets/stylesheets/bootstrap/pager.less
  131. +64 −0 server/vendor/assets/stylesheets/bootstrap/pagination.less
  132. +117 −0 server/vendor/assets/stylesheets/bootstrap/popovers.less
  133. +122 −0 server/vendor/assets/stylesheets/bootstrap/progress-bars.less
  134. +137 −0 server/vendor/assets/stylesheets/bootstrap/reset.less
  135. +28 −0 server/vendor/assets/stylesheets/bootstrap/responsive-1200px-min.less
  136. +174 −0 server/vendor/assets/stylesheets/bootstrap/responsive-767px-max.less
  137. +19 −0 server/vendor/assets/stylesheets/bootstrap/responsive-768px-979px.less
  138. +177 −0 server/vendor/assets/stylesheets/bootstrap/responsive-navbar.less
  139. +43 −0 server/vendor/assets/stylesheets/bootstrap/responsive-utilities.less
  140. +48 −0 server/vendor/assets/stylesheets/bootstrap/responsive.less
  141. +52 −0 server/vendor/assets/stylesheets/bootstrap/scaffolding.less
  142. +193 −0 server/vendor/assets/stylesheets/bootstrap/sprites.less
  143. +245 −0 server/vendor/assets/stylesheets/bootstrap/tables.less
  144. +52 −0 server/vendor/assets/stylesheets/bootstrap/thumbnails.less
  145. +70 −0 server/vendor/assets/stylesheets/bootstrap/tooltip.less
  146. +221 −0 server/vendor/assets/stylesheets/bootstrap/type.less
  147. +30 −0 server/vendor/assets/stylesheets/bootstrap/utilities.less
  148. +279 −0 server/vendor/assets/stylesheets/bootstrap/variables.less
  149. +29 −0 server/vendor/assets/stylesheets/bootstrap/wells.less
  150. BIN server/vendor/assets/stylesheets/images/ui-bg_flat_0_aaaaaa_40x100.png
  151. BIN server/vendor/assets/stylesheets/images/ui-bg_flat_75_ffffff_40x100.png
  152. BIN server/vendor/assets/stylesheets/images/ui-bg_glass_55_fbf9ee_1x400.png
  153. BIN server/vendor/assets/stylesheets/images/ui-bg_glass_65_ffffff_1x400.png
  154. BIN server/vendor/assets/stylesheets/images/ui-bg_glass_75_dadada_1x400.png
  155. BIN server/vendor/assets/stylesheets/images/ui-bg_glass_75_e6e6e6_1x400.png
  156. BIN server/vendor/assets/stylesheets/images/ui-bg_glass_95_fef1ec_1x400.png
  157. BIN server/vendor/assets/stylesheets/images/ui-bg_highlight-soft_75_cccccc_1x100.png
  158. BIN server/vendor/assets/stylesheets/images/ui-icons_222222_256x240.png
  159. BIN server/vendor/assets/stylesheets/images/ui-icons_2e83ff_256x240.png
  160. BIN server/vendor/assets/stylesheets/images/ui-icons_454545_256x240.png
  161. BIN server/vendor/assets/stylesheets/images/ui-icons_888888_256x240.png
  162. BIN server/vendor/assets/stylesheets/images/ui-icons_cd0a0a_256x240.png
  163. +563 −0 server/vendor/assets/stylesheets/jquery-ui-1.8.24.custom.less
  164. +3 −0 server/vendor/assets/stylesheets/jquery.multiselect.filter.less
  165. +23 −0 server/vendor/assets/stylesheets/jquery.multiselect.less
  166. +1 −0 server/vendor/assets/stylesheets/rickshaw.min.css
  167. +3 −0 server/views/home.erb
  168. +42 −0 server/views/layout.erb
13 LICENSE
@@ -0,0 +1,13 @@
Copyright 2012 LinkedIn, Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
@@ -0,0 +1,30 @@
# White Elephant
White Elephant is a Hadoop log aggregator and dashboard which enables
visualization of Hadoop cluster utilization across users.
## Taking it for a spin
To try out the server with some test data:
cd server
ant
./startup
Then visit [http://localhost:3000](http://localhost:3000).
## License
Copyright 2012 LinkedIn, Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src/avro_gen"/>
<classpathentry kind="src" path="src/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.testng.TESTNG_CONTAINER"/>
<classpathentry kind="con" path="org.apache.ivyde.eclipse.cpcontainer.IVYDE_CONTAINER/?project=whiteelephant&amp;ivyXmlPath=ivy.xml&amp;confs=*"/>
<classpathentry kind="output" path=".bin"/>
</classpath>
@@ -0,0 +1,11 @@
/report
/ivy
/lib/common
/lib/hadoop
/lib/ivy
/_generated
/build
dist/
.bin
src/avro_gen
test-output/
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>whiteelephant</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.apache.ivyde.eclipse.ivynature</nature>
</natures>
</projectDescription>
@@ -0,0 +1,59 @@
#!/usr/bin/env bash
# Environment Variables
#
# HADOOP_CONF_DIR Hadoop configuration
#
# HADOOP_LIB_DIR Hadoop JARs directory
#
# JAVA_OPTS (Optional) Java runtime options
home_dir=$(dirname $0)
if [ -z "$HADOOP_CONF_DIR" ]; then
echo "Must set HADOOP_CONF_DIR"
exit 1
fi
if [ -z "$HADOOP_LIB_DIR" ]; then
echo "Must set HADOOP_LIB_DIR"
exit 1
fi
if [ ! -d "$HADOOP_CONF_DIR" ]; then
echo "Directory $HADOOP_CONF_DIR not found"
exit 1
fi
if [ ! -d "$HADOOP_LIB_DIR" ]; then
echo "Directory $HADOOP_LIB_DIR not found"
exit 1
fi
if [ $# -eq 1 ]; then
JOB_FILE=$1
else
echo "Usage: run.sh <job-file>"
exit 1
fi
if [ ! -f $JOB_FILE ]; then
echo "File $JOB_FILE not found"
exit 1
fi
CLASSPATH=$HADOOP_CONF_DIR
for jar in $(find -H $HADOOP_LIB_DIR -name '*.jar');
do
CLASSPATH=$CLASSPATH:$jar
done;
for jar in $(find $home_dir -name '*.jar');
do
CLASSPATH=$CLASSPATH:$jar
done;
export CLASSPATH
java $JAVA_OPTS com.linkedin.whiteelephant.ProcessLogs $JOB_FILE
@@ -0,0 +1,136 @@
<project name="whiteelephant" basedir="." default="all"
xmlns:ivy="antlib:org.apache.ivy.ant"
xmlns:artifact="antlib:org.apache.maven.artifact.ant">
<property name="version" value="0.0.1" />
<property name="name" value="white-elephant" />
<property name="jar.core.name" value="${name}-core-${version}.jar" />
<property name="jar.final.name" value="${name}-${version}.jar" />
<property name="ivy.jar.version" value="2.2.0" />
<property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
<property name="ivy.jar.repo.url" value="${mvnrepo}/org/apache/ivy/ivy/${ivy.jar.version}/ivy-${ivy.jar.version}.jar"/>
<property name="build.dir" value="${basedir}/build" />
<property name="dist.dir" value="${basedir}/dist" />
<property name="lib.dir" value="${basedir}/lib" />
<property name="ivy.config.dir" value="${basedir}/config/ivy" />
<property name="ivy.xml.path" value="${ivy.config.dir}/ivy.xml" />
<property name="ivy.settings.xml.path" value="${ivy.config.dir}/ivysettings.xml" />
<property name="ivy.jar.dir" value="${lib.dir}/ivy" />
<property name="ivy.jar" value="${ivy.jar.dir}/ivy-${ivy.jar.version}.jar" />
<property name="lib.common.dir" value="${basedir}/lib/common" />
<property name="lib.hadoop.dir" value="${basedir}/lib/hadoop" />
<property name="lib.static.dir" value="${basedir}/lib/static" />
<property name="java.dir" value="${basedir}/src/java" />
<property name="avro.schemas.dir" value="${basedir}/src/avro_schemas" />
<property name="avro.gen.dir" value="${basedir}/src/avro_gen" />
<property name="classes.dir" value="${build.dir}/classes" />
<property name="report.dir" value="${basedir}/report" />
<property name="jar.core.path" value="${dist.dir}/${jar.core.name}"/>
<property name="jar.final.path" value="${dist.dir}/${jar.final.name}"/>
<property name="zip.path" value="${dist.dir}/${name}.zip"/>
<property name="job.dir" value="${dist.dir}/job"/>
<property name="job.config.dir" value="${basedir}/config/jobs"/>
<property name="bin.dir" value="${basedir}/bin"/>
<target name="all" depends="clean,init,jar"/>
<target name="ivy-jar-download" description="Download ivy jar">
<mkdir dir="${ivy.jar.dir}"/>
<get src="${ivy.jar.repo.url}" dest="${ivy.jar}" usetimestamp="true"/>
</target>
<target name="ivy-taskdef" depends="ivy-jar-download" description="install ivy">
<path id="ivy.lib.path">
<pathelement location="${ivy.jar}"/>
</path>
<taskdef resource="org/apache/ivy/ant/antlib.xml"
uri="antlib:org.apache.ivy.ant" classpathref="ivy.lib.path"/>
</target>
<target name="ivy-resolve" depends="ivy-taskdef" description="retreive dependencies with ivy">
<ivy:settings file="${ivy.settings.xml.path}" />
<ivy:resolve file="${ivy.xml.path}" />
<ivy:retrieve pattern="${lib.dir}/[conf]/[artifact]-[type]-[revision].[ext]"/>
</target>
<path id="main-classpath">
<fileset dir="${lib.common.dir}">
<include name="*.jar" />
</fileset>
<fileset dir="${lib.hadoop.dir}">
<include name="*.jar" />
</fileset>
<pathelement path="${classes.dir}" />
</path>
<target name="init" depends="ivy-resolve"></target>
<target name="clean">
<delete dir="${lib.common.dir}" />
<delete dir="${lib.hadoop.dir}" />
<delete dir="${ivy.jar.dir}" />
<delete dir="${build.dir}" />
<delete dir="${report.dir}" />
<delete dir="${dist.dir}" />
<delete dir="${avro.gen.dir}" />
</target>
<target name="avro-schemas" depends="init" description="compile avro scemas">
<delete dir="${avro.gen.dir}" />
<taskdef name="avroschema" classname="org.apache.avro.compiler.specific.SchemaTask">
<classpath refid="main-classpath" />
</taskdef>
<avroschema destdir="${avro.gen.dir}">
<fileset dir="${avro.schemas.dir}" includes="**/*.avsc" />
</avroschema>
</target>
<target name="build" depends="init,avro-schemas" description="compile source code">
<delete dir="${classes.dir}"/>
<mkdir dir="${classes.dir}"/>
<javac fork="true" destdir="${classes.dir}" target="1.6" debug="true"
deprecation="true" failonerror="true">
<src path="${java.dir}"/>
<src path="${avro.gen.dir}"/>
<classpath refid="main-classpath"/>
</javac>
</target>
<target name="jar" depends="build">
<mkdir dir="${dist.dir}"/>
<delete dir="${jar.core.path}"/>
<jar jarfile="${jar.core.path}" filesetmanifest="merge">
<fileset dir="${classes.dir}" />
<manifest>
<attribute name="Main-Class"
value="com.linkedin.whiteelephant.ProcessLogs"/>
</manifest>
</jar>
</target>
<target name="fatjar" depends="jar">
<mkdir dir="${dist.dir}"/>
<delete file="${jar.final.path}" />
<jar jarfile="${jar.final.path}" filesetmanifest="skip">
<zipfileset includes="**/*.class" src="${jar.core.path}"/>
<zipgroupfileset dir="${lib.common.dir}" includes="*.jar" />
<zipgroupfileset dir="${lib.static.dir}" includes="*.jar" />
<manifest>
<attribute name="Main-Class"
value="com.linkedin.whiteelephant.ProcessLogs"/>
</manifest>
</jar>
</target>
<target name="zip" depends="fatjar">
<mkdir dir="${dist.dir}"/>
<delete file="${zip.path}"/>
<zip destfile="${zip.path}">
<zipfileset dir="${bin.dir}" filemode="755" includes="*.*"/>
<zipfileset dir="${dist.dir}" includes="${jar.final.name}"/>
<zipfileset dir="${job.config.dir}" includes="*.*" />
</zip>
</target>
</project>
@@ -0,0 +1,25 @@
<ivy-module version="2.0">
<info organisation="com.linkedin" module="whiteelephant"/>
<configurations>
<conf name="common" description="libraries to include in the fat jar" />
<conf name="hadoop" description="for hadoop, won't fat jar, only needed for building" />
</configurations>
<dependencies>
<dependency org="org.apache.avro" name="avro" rev="1.7.2" conf="common->default"/>
<dependency org="org.apache.avro" name="avro-mapred" rev="1.7.2" conf="common->default"/>
<dependency org="org.apache.avro" name="avro-compiler" rev="1.7.2" conf="common->default"/>
<dependency org="org.testng" name="testng" rev="6.0.1" conf="common->default"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1" conf="common->default"/>
<dependency org="log4j" name="log4j" rev="1.2.17" conf="common->default"/>
<dependency org="org.slf4j" name="slf4j-log4j12" rev="1.6.4" conf="common->default" />
<dependency org="joda-time" name="joda-time" rev="1.6" conf="common->default"/>
<dependency org="com.googlecode.json-simple" name="json-simple" rev="1.1.1" conf="common->default"/>
<dependency org="org.json" name="json" rev="20090211" conf="common->default"/>
<dependency org="com.google.guava" name="guava" rev="13.0.1" conf="common->default"/>
<dependency org="org.easymock" name="easymock" rev="3.1" conf="common->default"/>
<dependency org="org.apache.hadoop" name="hadoop-core" rev="1.0.3" conf="hadoop->default"/>
<dependency org="org.apache.hadoop" name="hadoop-tools" rev="1.0.3" conf="hadoop->default"/>
<dependency org="com.hadoop.gplcompression" name="hadoop-lzo" rev="0.4.15" conf="hadoop->default"/>
</dependencies>
</ivy-module>
@@ -0,0 +1,9 @@
<ivysettings>
<settings defaultResolver="ivyrep"/>
<resolvers>
<chain name="ivyrep" returnFirst="true">
<ibiblio name="ibiblio" m2compatible="true"/>
<ibiblio name="twttr.com" m2compatible="true" root="http://maven.twttr.com/"/>
</chain>
</resolvers>
</ivysettings>
@@ -0,0 +1,43 @@
###############################
# White Elephant Configuration
###############################
# Base path in Hadoop where files will be stored
job.root=/path/to/job/root
# Base path where Hadoop logs are stored.
logs.root=/path/to/logs/root
# How many Hadoop jobs to run concurrently. Logs are assumed to be divided
# by day. A Hadoop job will be created for each day to process. These jobs
# are run concurrently to make the whole task finish faster.
job.concurrency=20
# How many days of log data to process.
num.days=100
# Always process the last n days of log data, even when processing data incrementally.
# This is in case recent log data is partial.
num.days.forced=5
# Where should parsed logs be stored.
jobs.output.path=/path/to/root/parsed-logs
# Where should aggregated usage data be stored
usage.output.path=/path/to/root/usage-per-hour
# Names of Hadoop clusters to process logs for.
cluster.names=dev-cluster,prod-cluster,other-cluster
#######################
# Hadoop Configuration
#######################
hadoop-conf.mapred.max.split.size=100000000
hadoop-conf.mapreduce.input.fileinputformat.split.maxsize=100000000
hadoop-conf.io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec
hadoop-conf.mapred.compress.map.output=true
hadoop-conf.mapred.map.output.compression.codec=com.hadoop.compression.lzo.LzoCodec
# TODO make sure to set your ugi
hadoop-conf.hadoop.job.ugi=username,hadoop
@@ -0,0 +1,2 @@
# Reprocess all data regardless of whether it has been processed before
incremental=false
@@ -0,0 +1,2 @@
# Save work by only processing what hasn't been done
incremental=true
Binary file not shown.
Oops, something went wrong.

0 comments on commit 56e5425

Please sign in to comment.