This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

Moved HBase and non-core code out of warcbase-core into warcbase-hbase.

  • Loading branch information...
lintool committed Jun 16, 2016
1 parent f972206 commit 0db46be7ef7fac9406cb8859b77103113a3bd316
Showing with 561 additions and 114 deletions.
  1. +1 −1 .travis.yml
  2. +21 −113 warcbase-core/pom.xml
  3. +539 −0 warcbase-hbase/pom.xml
  4. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/WarcbaseAdmin.java
  5. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/FindArcUrls.java
  6. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/FindWarcUrls.java
  7. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/ExtractLinksWac.java
  8. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/ExtractSiteLinks.java
  9. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/InvertAnchorText.java
  10. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/analysis/graph/PrefixMapping.java
  11. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/browser/SeleniumBrowser.java
  12. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/browser/WarcBrowser.java
  13. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/browser/WarcBrowserServlet.java
  14. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/HBaseTableManager.java
  15. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlMapping.java
  16. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlMappingBuilder.java
  17. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlMappingMapReduceBuilder.java
  18. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/data/UrlUtils.java
  19. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/demo/WacMapReduceHBaseDemo.java
  20. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/demo/WacMapReduceHBaseWrapperDemo.java
  21. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/index/IndexerMapper.java
  22. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/index/IndexerReducer.java
  23. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/index/IndexerRunner.java
  24. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/ingest/IngestFiles.java
  25. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/ingest/SearchForUrl.java
  26. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/Chain.java
  27. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/ChainMapContextImpl.java
  28. 0 ...→ warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/HBaseRowToArcRecordWritableMapper.java
  29. 0 {warcbase-core → warcbase-hbase}/src/main/java/org/warcbase/mapreduce/lib/TableChainMapper.java
  30. 0 {warcbase-core → warcbase-hbase}/src/main/solr/README.txt
  31. 0 {warcbase-core → warcbase-hbase}/src/main/solr/WARCIndexer.conf
  32. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/currency.xml
  33. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/elevate.xml
  34. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_ca.txt
  35. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_fr.txt
  36. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_ga.txt
  37. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/contractions_it.txt
  38. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/hyphenations_ga.txt
  39. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stemdict_nl.txt
  40. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stoptags_ja.txt
  41. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ar.txt
  42. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_bg.txt
  43. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ca.txt
  44. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_cz.txt
  45. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_da.txt
  46. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_de.txt
  47. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_el.txt
  48. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_en.txt
  49. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_es.txt
  50. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_eu.txt
  51. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_fa.txt
  52. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_fi.txt
  53. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_fr.txt
  54. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ga.txt
  55. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_gl.txt
  56. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_hi.txt
  57. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_hu.txt
  58. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_hy.txt
  59. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_id.txt
  60. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_it.txt
  61. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ja.txt
  62. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_lv.txt
  63. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_nl.txt
  64. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_no.txt
  65. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_pt.txt
  66. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ro.txt
  67. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_ru.txt
  68. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_sv.txt
  69. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_th.txt
  70. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/stopwords_tr.txt
  71. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/lang/userdict_ja.txt
  72. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/protwords.txt
  73. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/schema.xml
  74. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrconfig-production.xml
  75. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrconfig-server-4.10.4.xml
  76. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrconfig.xml
  77. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrcore.properties
  78. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/solrcore.properties-production
  79. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/stopwords.txt
  80. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/conf/synonyms.txt
  81. 0 {warcbase-core → warcbase-hbase}/src/main/solr/discovery/core.properties
  82. 0 {warcbase-core → warcbase-hbase}/src/main/solr/solr.xml
  83. 0 {warcbase-core → warcbase-hbase}/src/main/solr/zoo.cfg
  84. 0 {warcbase-core → warcbase-hbase}/src/test/java/org/warcbase/data/UrlMappingTest.java
  85. 0 {warcbase-core → warcbase-hbase}/src/test/java/org/warcbase/data/UrlUtilsTest.java
View
@@ -9,4 +9,4 @@ before_install:
- "export JAVA_OPTS=-Xmx512m"
script:
- mvn clean package appassembler:assemble
- mvn clean package
View
@@ -61,7 +61,7 @@
<build>
<plugins>
<plugin>
<!--plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.6.1</version>
<configuration>
@@ -72,7 +72,7 @@
</fileset>
</filesets>
</configuration>
</plugin>
</plugin-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
@@ -128,7 +128,7 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</executions>
</plugin>
<plugin>
<!--plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.4</version>
@@ -146,7 +146,7 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</configuration>
</execution>
</executions>
</plugin>
</plugin-->
<plugin>
<groupId>org.codehaus.mojo</groupId>
@@ -263,53 +263,26 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
<version>2.2.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.8</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>14.0.1</version>
<!-- downgrade for Hadoop WARC indexer, see also https://issues.apache.org/jira/browse/HADOOP-10961 -->
</dependency>
<dependency>
<groupId>tl.lin</groupId>
<artifactId>lintools-datatypes</artifactId>
<version>1.0.0</version>
</dependency>
<!-- Begin: Hadoop-related dependencies -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion><groupId>org.apache.hadoop</groupId><artifactId>hadoop-core</artifactId></exclusion>
<exclusion><groupId>org.mortbay.jetty</groupId><artifactId>servlet-api-2.5</artifactId></exclusion>
<exclusion><groupId>javax.servlet</groupId><artifactId>servlet-api</artifactId></exclusion>
<exclusion><groupId>asm</groupId><artifactId>asm</artifactId></exclusion>
</exclusions>
</dependency>
<!-- WacWarcLoaderTest depends on this -->
<!-- See http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/cdhvd_hadoop_api_dependencies.html -->
<dependency>
@@ -321,12 +294,6 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>${zookeeper.version}</version>
</dependency>
<!-- End: Hadoop-related dependencies -->
<dependency>
@@ -355,56 +322,33 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>dsiutils</artifactId>
<version>2.2.0</version>
<exclusions>
<exclusion><groupId>ch.qos.logback</groupId><artifactId>logback-classic</artifactId></exclusion>
<exclusion><groupId>commons-lang</groupId><artifactId>commons-lang</artifactId></exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>6.5.15</version>
<exclusions>
<exclusion><groupId>commons-lang</groupId><artifactId>commons-lang</artifactId></exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>${jettyVersion}</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-webapp</artifactId>
<version>${jettyVersion}</version>
<optional>true</optional>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.4</version>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.8</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.0</version>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<!--
<dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>2.3</version>
</dependency>
-->
<dependency>
<groupId>org.apache.tika</groupId>
@@ -423,17 +367,6 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
<version>3.5.2</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>2.42.2</version>
<exclusions>
<exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-htmlunit-driver</artifactId></exclusion>
<exclusion><groupId>org.seleniumhq.selenium</groupId><artifactId>selenium-ie-driver</artifactId></exclusion>
<exclusion><groupId>org.webbitserver</groupId><artifactId>webbit</artifactId></exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
@@ -483,11 +416,13 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
<version>1.2.1</version>
</dependency>
<!--
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<version>1.0.5</version>
</dependency>
-->
<dependency>
<groupId>edu.stanford.nlp</groupId>
@@ -507,33 +442,6 @@ http://mail-archives.apache.org/mod_mbox/lucene-java-user/201308.mbox/%3CWC20130
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-core</artifactId>
<version>4.7.2</version>
<exclusions>
<exclusion><artifactId>slf4j-api</artifactId><groupId>org.slf4j</groupId></exclusion>
<exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-annotations</groupId></exclusion>
<exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-common</groupId></exclusion>
<exclusion><artifactId>org.apache.hadoop</artifactId><groupId>hadoop-hdfs</groupId></exclusion>
<exclusion><groupId>com.typesafe</groupId><artifactId>config</artifactId></exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>uk.bl.wa.discovery</groupId>
<artifactId>warc-hadoop-indexer</artifactId>
<version>2.2.0-BETA-5</version>
<exclusions>
<exclusion><groupId>asm</groupId><artifactId>asm</artifactId></exclusion>
<exclusion><groupId>com.typesafe</groupId><artifactId>config</artifactId></exclusion>
</exclusions>
</dependency>
</dependencies>
</project>
Oops, something went wrong.

0 comments on commit 0db46be

Please sign in to comment.