Permalink
Browse files

Moved HBase and non-core code out of warcbase-core into warcbase-hbase.

  • Loading branch information...
1 parent f972206 commit 0db46be7ef7fac9406cb8859b77103113a3bd316 @lintool committed Jun 16, 2016
Showing with 561 additions and 114 deletions.
  1. +1 −1 .travis.yml
  2. +21 −113 warcbase-core/pom.xml
  3. +539 −0 warcbase-hbase/pom.xml
  4. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/WarcbaseAdmin.java
  5. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/FindArcUrls.java
  6. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/FindWarcUrls.java
  7. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/ExtractLinksWac.java
  8. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/ExtractSiteLinks.java
  9. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/InvertAnchorText.java
  10. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/PrefixMapping.java
  11. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/browser/SeleniumBrowser.java
  12. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/browser/WarcBrowser.java
  13. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/browser/WarcBrowserServlet.java
  14. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/HBaseTableManager.java
  15. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlMapping.java
  16. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlMappingBuilder.java
  17. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlMappingMapReduceBuilder.java
  18. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlUtils.java
  19. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/demo/WacMapReduceHBaseDemo.java
  20. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/demo/WacMapReduceHBaseWrapperDemo.java
  21. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/index/IndexerMapper.java
  22. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/index/IndexerReducer.java
  23. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/index/IndexerRunner.java
  24. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/ingest/IngestFiles.java
  25. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/ingest/SearchForUrl.java
  26. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/Chain.java
  27. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/ChainMapContextImpl.java
  28. 0 ...→ warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/HBaseRowToArcRecordWritableMapper.java
  29. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/TableChainMapper.java
  30. 0 {warcbase-core → warcbase-hbase}/src/main/solr/README.txt
  31. 0 {warcbase-core → warcbase-hbase}/src/main/solr/WARCIndexer.conf
  32. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/currency.xml
  33. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/elevate.xml
  34. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_ca.txt
  35. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_fr.txt
  36. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_ga.txt
  37. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_it.txt
  38. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/hyphenations_ga.txt
  39. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stemdict_nl.txt
  40. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stoptags_ja.txt
  41. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ar.txt
  42. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_bg.txt
  43. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ca.txt
  44. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_cz.txt
  45. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_da.txt
  46. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_de.txt
  47. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_el.txt
  48. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_en.txt
  49. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_es.txt
  50. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_eu.txt
  51. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_fa.txt
  52. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_fi.txt
  53. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_fr.txt
  54. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ga.txt
  55. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_gl.txt
  56. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_hi.txt
  57. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_hu.txt
  58. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_hy.txt
  59. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_id.txt
  60. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_it.txt
  61. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ja.txt
  62. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_lv.txt
  63. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_nl.txt
  64. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_no.txt
  65. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_pt.txt
  66. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ro.txt
  67. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ru.txt
  68. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_sv.txt
  69. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_th.txt
  70. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_tr.txt
  71. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/userdict_ja.txt
  72. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/protwords.txt
  73. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/schema.xml
  74. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrconfig-production.xml
  75. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrconfig-server-4.10.4.xml
  76. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrconfig.xml
  77. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrcore.properties
  78. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrcore.properties-production
  79. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/stopwords.txt
  80. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/synonyms.txt
  81. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/core.properties
  82. 0 {warcbase-core → warcbase-hbase}/src/main/solr/solr.xml
  83. 0 {warcbase-core → warcbase-hbase}/src/main/solr/zoo.cfg
  84. 0 {warcbase-core → warcbase-hbase}/src/test/java/org/warcbase/data/UrlMappingTest.java
  85. 0 {warcbase-core → warcbase-hbase}/src/test/java/org/warcbase/data/UrlUtilsTest.java
View
@@ -9,4 +9,4 @@ before_install:
- "export JAVA_OPTS=-Xmx512m"
script:
- - mvn clean package appassembler:assemble
+ - mvn clean package
View
@@ -61,7 +61,7 @@
<build>
<plugins>
- <plugin>
+ <!--plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.6.1</version>
<configuration>
@@ -72,7 +72,7 @@
</fileset>
</filesets>
</configuration>
- </plugin>
+ </plugin-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@@ -128,7 +128,7 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</executions>
</plugin>
-<plugin>
+<!--plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.4</version>
@@ -146,7 +146,7 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</configuration>
</execution>
</executions>
-</plugin>
+</plugin-->
<plugin>
<groupId>org.codehaus.mojo</groupId>
@@ -263,53 +263,26 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
<version>2.2.4</version>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
- <version>1.8</version>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- <version>2.4</version>
- </dependency>
+
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
+
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>14.0.1</version>
<!-- downgrade for Hadoop WARC indexer, see also https://issues.apache.org/jira/browse/HADOOP-10961 -->
</dependency>
+
<dependency>
<groupId>tl.lin</groupId>
<artifactId>lintools-datatypes</artifactId>
<version>1.0.0</version>
</dependency>
-
- <!-- Begin: Hadoop-related dependencies -->
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-client</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase-server</artifactId>
- <version>${hbase.version}</version>
- <exclusions>
- <exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
- <exclusion><groupId>org.mortbay.jetty</groupId><artifactId>servlet-api-2.5</artifactId></exclusion>
- <exclusion><groupId>javax.servlet</groupId><artifactId>servlet-api</artifactId></exclusion>
- <exclusion><groupId>asm</groupId><artifactId>asm</artifactId></exclusion>
- </exclusions>
- </dependency>
+ <!-- WacWarcLoaderTest depends on this -->
<!-- See http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/cdhvd_hadoop_api_dependencies.html -->
<dependency>
@@ -321,12 +294,6 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</exclusions>
</dependency>
- <dependency>
- <groupId>org.apache.zookeeper</groupId>
- <artifactId>zookeeper</artifactId>
- <version>${zookeeper.version}</version>
- </dependency>
-
<!-- End: Hadoop-related dependencies -->
<dependency>
@@ -355,56 +322,33 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</dependency>
<dependency>
- <groupId>it.unimi.dsi</groupId>
- <artifactId>dsiutils</artifactId>
- <version>2.2.0</version>
- <exclusions>
- <exclusion><groupId>ch.qos.logback</groupId><artifactId>logback-classic</artifactId></exclusion>
- <exclusion><groupId>commons-lang</groupId><artifactId>commons-lang</artifactId></exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>it.unimi.dsi</groupId>
- <artifactId>fastutil</artifactId>
- <version>6.5.15</version>
- <exclusions>
- <exclusion><groupId>commons-lang</groupId><artifactId>commons-lang</artifactId></exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>${jettyVersion}</version>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-webapp</artifactId>
- <version>${jettyVersion}</version>
- <optional>true</optional>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <version>3.0</version>
</dependency>
<dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.4</version>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>1.8</version>
</dependency>
-
<dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- <version>3.0</version>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.4</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
+ <!--
<dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>2.3</version>
</dependency>
+ -->
<dependency>
<groupId>org.apache.tika</groupId>
@@ -424,17 +368,6 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</dependency>
<dependency>
- <groupId>org.seleniumhq.selenium</groupId>
- <artifactId>selenium-java</artifactId>
- <version>2.42.2</version>
- <exclusions>
- <exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-htmlunit-driver</artifactId></exclusion>
- <exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-ie-driver</artifactId></exclusion>
- <exclusion><groupId>org.webbitserver</groupId><artifactId>webbit</artifactId></exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.10.4</version>
@@ -483,11 +416,13 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
<version>1.2.1</version>
</dependency>
+ <!--
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<version>1.0.5</version>
</dependency>
+ -->
<dependency>
<groupId>edu.stanford.nlp</groupId>
@@ -507,33 +442,6 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
<version>2.11.0</version>
</dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-core</artifactId>
- <version>4.7.2</version>
- </dependency>
- <dependency>
- <groupId>org.apache.solr</groupId>
- <artifactId>solr-core</artifactId>
- <version>4.7.2</version>
- <exclusions>
- <exclusion><artifactId>slf4j-api</artifactId><groupId>org.slf4j</groupId></exclusion>
- <exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-annotations</groupId></exclusion>
- <exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-common</groupId></exclusion>
- <exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-hdfs</groupId></exclusion>
- <exclusion><groupId>com.typesafe</groupId><artifactId>config</artifactId></exclusion>
- </exclusions>
- </dependency>
-
- <dependency>
- <groupId>uk.bl.wa.discovery</groupId>
- <artifactId>warc-hadoop-indexer</artifactId>
- <version>2.2.0-BETA-5</version>
- <exclusions>
- <exclusion><groupId>asm</groupId><artifactId>asm</artifactId></exclusion>
- <exclusion><groupId>com.typesafe</groupId><artifactId>config</artifactId></exclusion>
- </exclusions>
- </dependency>
</dependencies>
</project>
Oops, something went wrong.

0 comments on commit 0db46be

Please sign in to comment.