diff --git a/KumoIntelliJInspections.xml b/KumoIntelliJInspections.xml
new file mode 100644
index 0000000..9b2edba
--- /dev/null
+++ b/KumoIntelliJInspections.xml
@@ -0,0 +1,1420 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /bin/bash
+ /bin/sh
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/README.md b/README.md
index 97ef2f2..74df022 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,8 @@ Kumo's goal is to create a powerful and user friendly Word Cloud API in Java. Ku
Please feel free to jump in and help improve Kumo! There are many places for performance optimization in Kumo!
+[![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.kennycason/kumo/badge.svg?style=flat)](https://maven-badges.herokuapp.com/maven-central/com.kennycason/kumo)
+
### Current Features
- Draw Rectangle, Circle or Image Overlay word clouds. Image Overlay will draw words over all non-transparent pixels.
@@ -26,7 +28,7 @@ Please feel free to jump in and help improve Kumo! There are many places for per
com.kennycason
kumo
- 1.8
+ 1.9
```
@@ -382,3 +384,8 @@ Create a layered word cloud
```
kumo --input "https://www.haskell.org/, https://en.wikipedia.org/wiki/Haskell_(programming_language)" --output "/tmp/nintendo_vs_playstation.png" --type layered --background "https://raw.githubusercontent.com/kennycason/kumo/master/src/test/resources/backgrounds/haskell_1.bmp,https://raw.githubusercontent.com/kennycason/kumo/master/src/test/resources/backgrounds/haskell_2.bmp" --color "(0xFA6C07),(0xFF7614),(0xFF8936)|(0x080706),(0x3B3029),(0x47362A)"
```
+
+
+### Contributing
+
+My primary IDE of choice is IntelliJ due to their robust tooling as well as code analysis/inspections. If using [IntelliJ IDEA](https://www.jetbrains.com/idea/), I recommend importing `KumoIntelliJInspections.xml`. I am also consiering adding Checkstyle support.
\ No newline at end of file
diff --git a/kumo-api/src/main/java/com/kennycason/kumo/IntegrationTest.java b/kumo-api/src/main/java/com/kennycason/kumo/IntegrationTest.java
deleted file mode 100644
index 5c0ee82..0000000
--- a/kumo-api/src/main/java/com/kennycason/kumo/IntegrationTest.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package com.kennycason.kumo;
-
-/**
- * Created by kenny on 2/21/16.
- */
-public interface IntegrationTest {
-}
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java b/kumo-api/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java
rename to kumo-api/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java
diff --git a/kumo-cli/pom.xml b/kumo-cli/pom.xml
index 2f3c7bf..3611dff 100644
--- a/kumo-cli/pom.xml
+++ b/kumo-cli/pom.xml
@@ -11,21 +11,27 @@
kumo-cli
+
+ com.kennycason.kumo.cli.KumoCli
+
+
com.kennycason
- kumo-api
- 1.9
+ kumo-core
com.kennycason
- kumo-nlp
- 1.9
-
- com.kennycason
- kumo-core
- 1.9
+ kumo-tokenizers
+
+
+
+ com.beust
+ jcommander
+
+
+
junit
junit
@@ -51,9 +57,30 @@
org.apache.maven.plugins
maven-surefire-plugin
+
+
org.apache.maven.plugins
maven-shade-plugin
+ 2.4.3
+
+
+
+
+ ${cli.main.class}
+
+
+
+
+
+
+ package
+
+ shade
+
+
+
diff --git a/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java b/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java
index 1217c85..cbba34c 100644
--- a/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java
+++ b/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java
@@ -18,10 +18,10 @@
import com.kennycason.kumo.font.scale.SqrtFontScalar;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
import com.kennycason.kumo.nlp.normalize.*;
-import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer;
-import com.kennycason.kumo.nlp.tokenizer.EnglishWordTokenizer;
import com.kennycason.kumo.nlp.tokenizer.WhiteSpaceWordTokenizer;
import com.kennycason.kumo.nlp.tokenizer.WordTokenizer;
+import com.kennycason.kumo.nlp.tokenizers.ChineseWordTokenizer;
+import com.kennycason.kumo.nlp.tokenizers.EnglishWordTokenizer;
import com.kennycason.kumo.palette.ColorPalette;
import com.kennycason.kumo.wordstart.CenterWordStart;
import com.kennycason.kumo.wordstart.RandomWordStart;
diff --git a/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java b/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java
index 114e451..ad50a59 100644
--- a/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java
+++ b/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java
@@ -19,11 +19,11 @@ public class ParenthesisSerializer {
public static String serialize(final Collection collection) {
if (collection.isEmpty()) { return ""; }
- String joined = collection.stream()
- .map(i -> i.toString())
- .collect(Collectors.joining("),("));
+ final String joined = collection.stream()
+ .map(i -> i.toString())
+ .collect(Collectors.joining("),("));
- return "(" + joined + ")";
+ return '(' + joined + ')';
}
public static List deserialize(final String value) {
diff --git a/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java b/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java
index 037f9cd..ffbc8fc 100644
--- a/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java
+++ b/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java
@@ -1,16 +1,13 @@
package com.kennycason.kumo.cli;
-import com.kennycason.kumo.IntegrationTest;
-import org.junit.Ignore;
-import org.junit.experimental.categories.Category;
+import org.junit.Test;
/**
* Created by kenny on 6/12/16.
*/
-@Category(IntegrationTest.class)
-@Ignore
public class KumoCliITest {
+ @Test
public void simple() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -18,6 +15,7 @@ public void simple() {
});
}
+ @Test
public void stopwords() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -26,6 +24,7 @@ public void stopwords() {
});
}
+ @Test
public void wordCount() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -34,6 +33,7 @@ public void wordCount() {
});
}
+ @Test
public void widthAndHeight() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -43,6 +43,7 @@ public void widthAndHeight() {
});
}
+ @Test
public void randomWordStart() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -51,6 +52,7 @@ public void randomWordStart() {
});
}
+ @Test
public void font() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -63,7 +65,7 @@ public void font() {
});
}
-
+ @Test
public void normalizer() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -72,7 +74,7 @@ public void normalizer() {
});
}
-
+ @Test
public void backgroundImage() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -83,7 +85,7 @@ public void backgroundImage() {
});
}
-
+ @Test
public void colorRgb() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -92,7 +94,7 @@ public void colorRgb() {
});
}
-
+ @Test
public void colorRgbHex() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -101,7 +103,7 @@ public void colorRgbHex() {
});
}
-
+ @Test
public void colorHex() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo",
@@ -110,7 +112,7 @@ public void colorHex() {
});
}
-
+ @Test
public void chinese() {
KumoCli.main(new String[] {
"--input", "https://zh.wikipedia.org/wiki/%E4%BB%BB%E5%A4%A9%E5%A0%82",
@@ -119,7 +121,7 @@ public void chinese() {
});
}
-
+ @Test
public void polar() {
KumoCli.main(new String[] {
"--input", "https://en.wikipedia.org/wiki/Nintendo, https://en.wikipedia.org/wiki/PlayStation",
@@ -129,7 +131,7 @@ public void polar() {
});
}
-
+ @Test
public void layered() {
KumoCli.main(new String[] {
"--input", "https://www.haskell.org/, https://en.wikipedia.org/wiki/Haskell_(programming_language)",
diff --git a/kumo-core/pom.xml b/kumo-core/pom.xml
index da75863..7bd788f 100644
--- a/kumo-core/pom.xml
+++ b/kumo-core/pom.xml
@@ -15,19 +15,17 @@
com.kennycason
kumo-api
- 1.9
-
-
- com.kennycason
- kumo-nlp
- 1.9
- test
+
log4j
log4j
+
+ org.jsoup
+ jsoup
+
commons-io
commons-io
@@ -36,14 +34,12 @@
org.apache.commons
commons-lang3
-
- com.beust
- jcommander
-
com.github.davidmoten
rtree
+
+
junit
junit
@@ -65,10 +61,6 @@
org.apache.maven.plugins
maven-javadoc-plugin
-
- org.apache.maven.plugins
- maven-surefire-plugin
-
\ No newline at end of file
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java
similarity index 100%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java
rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java
diff --git a/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java b/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java
index 7d2b813..f2357d9 100644
--- a/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java
+++ b/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java
@@ -12,8 +12,6 @@
import com.kennycason.kumo.font.scale.LinearFontScalar;
import com.kennycason.kumo.font.scale.SqrtFontScalar;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
-import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer;
-import com.kennycason.kumo.palette.ColorPalette;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.junit.Test;
@@ -98,36 +96,6 @@ public void newyorkPolarRectangle() throws IOException {
}
@Test
- public void chineseVsEnglishTideComments() throws IOException {
- final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
- frequencyAnalyzer.setWordFrequenciesToReturn(750);
- frequencyAnalyzer.setMinWordLength(3);
- frequencyAnalyzer.setStopWords(loadStopWords());
- final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/english_tide.txt"));
-
- final FrequencyAnalyzer chineseFrequencyAnalyzer = new FrequencyAnalyzer();
- chineseFrequencyAnalyzer.setWordFrequenciesToReturn(750);
- chineseFrequencyAnalyzer.setMinWordLength(2);
- chineseFrequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
- final List wordFrequencies2 = chineseFrequencyAnalyzer.load(getInputStream("text/chinese_tide.txt"));
-
- final Dimension dimension = new Dimension(800, 600);
- final PolarWordCloud wordCloud = new PolarWordCloud(dimension, CollisionMode.PIXEL_PERFECT, PolarBlendMode.BLUR);
- wordCloud.setPadding(2);
- wordCloud.setBackground(new RectangleBackground(dimension));
- wordCloud.setFontScalar(new SqrtFontScalar(10, 70));
-
- final ColorPalette colorPalette = new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5));
- final ColorPalette colorPalette2 = new ColorPalette(new Color(0xFA8E8E), new Color(0xF77979), new Color(0xF55F5F), new Color(0xF24949));
- wordCloud.setColorPalette(colorPalette);
- wordCloud.setColorPalette2(colorPalette2);
-
- final long startTime = System.currentTimeMillis();
- wordCloud.build(wordFrequencies, wordFrequencies2);
- LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build");
- wordCloud.writeToFile("output/polar_tide_chinese_vs_english2.png");
- }
-
public void tidyCatLitter() throws IOException {
final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
frequencyAnalyzer.setWordFrequenciesToReturn(400);
diff --git a/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java b/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java
index bcf21f8..9b1be6b 100644
--- a/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java
+++ b/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java
@@ -1,7 +1,6 @@
package com.kennycason.kumo.examples;
import com.kennycason.kumo.CollisionMode;
-import com.kennycason.kumo.IntegrationTest;
import com.kennycason.kumo.WordCloud;
import com.kennycason.kumo.WordFrequency;
import com.kennycason.kumo.bg.CircleBackground;
@@ -13,13 +12,10 @@
import com.kennycason.kumo.font.scale.SqrtFontScalar;
import com.kennycason.kumo.image.AngleGenerator;
import com.kennycason.kumo.nlp.FrequencyAnalyzer;
-import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer;
import com.kennycason.kumo.palette.ColorPalette;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
-import org.junit.Ignore;
import org.junit.Test;
-import org.junit.experimental.categories.Category;
import java.awt.*;
import java.io.FileInputStream;
@@ -32,8 +28,6 @@
/**
* Created by kenny on 6/29/14.
*/
-@Category(IntegrationTest.class)
-@Ignore
public class WordCloudITest {
private static final Logger LOGGER = Logger.getLogger(WordCloudITest.class);
@@ -220,26 +214,6 @@ public void datarankCircleLarge() throws IOException {
wordCloud.writeToFile("output/datarank_wordcloud_circle_large2.png");
}
- @Test
- public void chineseCircle() throws IOException {
- final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
- frequencyAnalyzer.setWordFrequenciesToReturn(600);
- frequencyAnalyzer.setMinWordLength(2);
- frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
-
- final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_language.txt"));
- final Dimension dimension = new Dimension(600, 600);
- final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
- wordCloud.setPadding(2);
- wordCloud.setBackground(new CircleBackground(300));
- wordCloud.setColorPalette(new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5)));
- wordCloud.setFontScalar(new SqrtFontScalar(12, 45));
- final long startTime = System.currentTimeMillis();
- wordCloud.build(wordFrequencies);
- LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build");
- wordCloud.writeToFile("output/chinese_language_circle.png");
- }
-
@Test
public void datarankEarthImage() throws IOException {
final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
@@ -303,27 +277,6 @@ public void datarankCode() throws IOException {
wordCloud.writeToFile("/tmp/datarank_code.png");
}
- @Test
- public void dragonChinese() throws IOException {
- final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
- frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
- frequencyAnalyzer.setWordFrequenciesToReturn(900);
- frequencyAnalyzer.setMinWordLength(1);
- frequencyAnalyzer.setStopWords(Arrays.asList("是", "不", "了", "的", "个", "子"));
-
- final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_dragon.txt"));
- final Dimension dimension = new Dimension(555, 555);
- final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
- wordCloud.setPadding(1);
- wordCloud.setBackgroundColor(new Color(0xE35A05));
- wordCloud.setAngleGenerator(new AngleGenerator(0));
- wordCloud.setBackground(new PixelBoundryBackground(getInputStream("backgrounds/dragon.png")));
- wordCloud.setColorPalette(new ColorPalette(new Color(0x0), new Color(0x333333), new Color(0x555555)));
- wordCloud.setFontScalar(new SqrtFontScalar(6, 50));
- wordCloud.build(wordFrequencies);
- wordCloud.writeToFile("output/dragon_chinese.png");
- }
-
@Test
public void largeCircleTest() throws IOException {
final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
diff --git a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/WhiteSpaceWordTokenizerTest.java b/kumo-core/src/test/java/com/kennycason/kumo/nlp/tokenizers/WhiteSpaceWordTokenizerTest.java
similarity index 94%
rename from kumo-nlp/src/test/java/com/kennycason/kumo/nlp/WhiteSpaceWordTokenizerTest.java
rename to kumo-core/src/test/java/com/kennycason/kumo/nlp/tokenizers/WhiteSpaceWordTokenizerTest.java
index 210eefc..71c4ec7 100644
--- a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/WhiteSpaceWordTokenizerTest.java
+++ b/kumo-core/src/test/java/com/kennycason/kumo/nlp/tokenizers/WhiteSpaceWordTokenizerTest.java
@@ -1,4 +1,4 @@
-package com.kennycason.kumo.nlp;
+package com.kennycason.kumo.nlp.tokenizers;
import com.kennycason.kumo.nlp.tokenizer.WhiteSpaceWordTokenizer;
import com.kennycason.kumo.nlp.tokenizer.WordTokenizer;
diff --git a/kumo-tokenizers/README.md b/kumo-tokenizers/README.md
new file mode 100644
index 0000000..3bc7647
--- /dev/null
+++ b/kumo-tokenizers/README.md
@@ -0,0 +1,82 @@
+# Kumo Tokenizers
+
+This module is separated from Kumo Core to prevent Kumo Core from becoming too bloated. This module will contain language tokenizers for various languages.
+Currently, the only languages included are the `EnglishWordTokenizer` and the `ChineseWordTokenizer`.
+
+*Note*: All the examples will soon be extracted to another module for better clarity. This readme is just a place holder while I refactor.
+
+Below are a few examples of how to use the `ChineseWordTokenizer`.
+
+```java
+@Test
+public void dragonChinese() throws IOException {
+ final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
+ frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
+ frequencyAnalyzer.setWordFrequenciesToReturn(900);
+ frequencyAnalyzer.setMinWordLength(1);
+ frequencyAnalyzer.setStopWords(Arrays.asList("是", "不", "了", "的", "个", "子"));
+
+ final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_dragon.txt"));
+ final Dimension dimension = new Dimension(555, 555);
+ final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
+ wordCloud.setPadding(1);
+ wordCloud.setBackgroundColor(new Color(0xE35A05));
+ wordCloud.setAngleGenerator(new AngleGenerator(0));
+ wordCloud.setBackground(new PixelBoundryBackground(getInputStream("backgrounds/dragon.png")));
+ wordCloud.setColorPalette(new ColorPalette(new Color(0x0), new Color(0x333333), new Color(0x555555)));
+ wordCloud.setFontScalar(new SqrtFontScalar(6, 50));
+ wordCloud.build(wordFrequencies);
+ wordCloud.writeToFile("output/dragon_chinese.png");
+}
+
+@Test
+public void chineseCircle() throws IOException {
+ final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
+ frequencyAnalyzer.setWordFrequenciesToReturn(600);
+ frequencyAnalyzer.setMinWordLength(2);
+ frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
+
+ final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_language.txt"));
+ final Dimension dimension = new Dimension(600, 600);
+ final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
+ wordCloud.setPadding(2);
+ wordCloud.setBackground(new CircleBackground(300));
+ wordCloud.setColorPalette(new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5)));
+ wordCloud.setFontScalar(new SqrtFontScalar(12, 45));
+ final long startTime = System.currentTimeMillis();
+ wordCloud.build(wordFrequencies);
+ LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build");
+ wordCloud.writeToFile("output/chinese_language_circle.png");
+}
+
+@Test
+public void chineseVsEnglishTideComments() throws IOException {
+ final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
+ frequencyAnalyzer.setWordFrequenciesToReturn(750);
+ frequencyAnalyzer.setMinWordLength(3);
+ frequencyAnalyzer.setStopWords(loadStopWords());
+ final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/english_tide.txt"));
+
+ final FrequencyAnalyzer chineseFrequencyAnalyzer = new FrequencyAnalyzer();
+ chineseFrequencyAnalyzer.setWordFrequenciesToReturn(750);
+ chineseFrequencyAnalyzer.setMinWordLength(2);
+ chineseFrequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
+ final List wordFrequencies2 = chineseFrequencyAnalyzer.load(getInputStream("text/chinese_tide.txt"));
+
+ final Dimension dimension = new Dimension(800, 600);
+ final PolarWordCloud wordCloud = new PolarWordCloud(dimension, CollisionMode.PIXEL_PERFECT, PolarBlendMode.BLUR);
+ wordCloud.setPadding(2);
+ wordCloud.setBackground(new RectangleBackground(dimension));
+ wordCloud.setFontScalar(new SqrtFontScalar(10, 70));
+
+ final ColorPalette colorPalette = new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5));
+ final ColorPalette colorPalette2 = new ColorPalette(new Color(0xFA8E8E), new Color(0xF77979), new Color(0xF55F5F), new Color(0xF24949));
+ wordCloud.setColorPalette(colorPalette);
+ wordCloud.setColorPalette2(colorPalette2);
+
+ final long startTime = System.currentTimeMillis();
+ wordCloud.build(wordFrequencies, wordFrequencies2);
+ LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build");
+ wordCloud.writeToFile("output/polar_tide_chinese_vs_english2.png");
+}
+```
\ No newline at end of file
diff --git a/kumo-nlp/pom.xml b/kumo-tokenizers/pom.xml
similarity index 83%
rename from kumo-nlp/pom.xml
rename to kumo-tokenizers/pom.xml
index 03df400..aea8383 100644
--- a/kumo-nlp/pom.xml
+++ b/kumo-tokenizers/pom.xml
@@ -9,35 +9,25 @@
4.0.0
- kumo-nlp
+ kumo-tokenizers
com.kennycason
kumo-api
- 1.9
+
+
log4j
log4j
-
- commons-io
- commons-io
-
-
- org.apache.commons
- commons-lang3
-
-
- org.jsoup
- jsoup
-
junit
junit
test
+
org.languagetool
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/ChineseWordTokenizer.java b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizer.java
similarity index 89%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/ChineseWordTokenizer.java
rename to kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizer.java
index a55a568..0b30095 100644
--- a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/ChineseWordTokenizer.java
+++ b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizer.java
@@ -1,5 +1,6 @@
-package com.kennycason.kumo.nlp.tokenizer;
+package com.kennycason.kumo.nlp.tokenizers;
+import com.kennycason.kumo.nlp.tokenizer.WordTokenizer;
import org.languagetool.language.Chinese;
import org.languagetool.tokenizers.Tokenizer;
@@ -7,11 +8,8 @@
import java.util.List;
public class ChineseWordTokenizer implements WordTokenizer {
-
private static final Chinese CHINESE = new Chinese();
- public ChineseWordTokenizer() {}
-
@Override
public List tokenize(final String sentence) {
final Tokenizer tokenizer = CHINESE.getWordTokenizer();
diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/EnglishWordTokenizer.java b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/EnglishWordTokenizer.java
similarity index 87%
rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/EnglishWordTokenizer.java
rename to kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/EnglishWordTokenizer.java
index 98af77a..57d5d5a 100644
--- a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/EnglishWordTokenizer.java
+++ b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/EnglishWordTokenizer.java
@@ -1,5 +1,6 @@
-package com.kennycason.kumo.nlp.tokenizer;
+package com.kennycason.kumo.nlp.tokenizers;
+import com.kennycason.kumo.nlp.tokenizer.WordTokenizer;
import org.languagetool.language.English;
import org.languagetool.tokenizers.Tokenizer;
@@ -7,7 +8,6 @@
import java.util.List;
public class EnglishWordTokenizer implements WordTokenizer {
-
private static final English ENGLISH = new English();
public EnglishWordTokenizer() {}
diff --git a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/ChineseWordTokenizerTest.java b/kumo-tokenizers/src/test/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizerTest.java
similarity index 92%
rename from kumo-nlp/src/test/java/com/kennycason/kumo/nlp/ChineseWordTokenizerTest.java
rename to kumo-tokenizers/src/test/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizerTest.java
index 3aa9b10..fd47b0f 100644
--- a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/ChineseWordTokenizerTest.java
+++ b/kumo-tokenizers/src/test/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizerTest.java
@@ -1,6 +1,5 @@
-package com.kennycason.kumo.nlp;
+package com.kennycason.kumo.nlp.tokenizers;
-import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer;
import com.kennycason.kumo.nlp.tokenizer.WordTokenizer;
import org.apache.log4j.Logger;
import org.junit.Test;
diff --git a/kumo-nlp/src/test/resources/log4j.xml b/kumo-tokenizers/src/test/resources/log4j.xml
similarity index 100%
rename from kumo-nlp/src/test/resources/log4j.xml
rename to kumo-tokenizers/src/test/resources/log4j.xml
diff --git a/pom.xml b/pom.xml
index d2448f7..45a2cb3 100755
--- a/pom.xml
+++ b/pom.xml
@@ -6,20 +6,19 @@
com.kennycason
kumo
+ 1.9
pom
${project.artifactId}
Kumo's goal is to create a powerful and user friendly Word Cloud API in Java. Kumo directly generates an image file without the need to create an applet (as many other libraries do).
https://github.com/kennycason/kumo
- kumo-nlp
- kumo-core
kumo-api
+ kumo-core
kumo-cli
+ kumo-tokenizers
- 1.9
-
UTF-8
@@ -30,11 +29,31 @@
>${encoding}
${java.target}
- com.kennycason.kumo.cli.KumoCli
+
+ com.kennycason
+ kumo-api
+ ${project.version}
+
+
+ com.kennycason
+ kumo-core
+ ${project.version}
+
+
+ com.kennycason
+ kumo-cli
+ ${project.version}
+
+
+ com.kennycason
+ kumo-tokenizers
+ ${project.version}
+
+
log4j
log4j
@@ -209,79 +228,12 @@
-Xms256m -Xmx768m -XX:+CMSClassUnloadingEnabled -Dfile.encoding=UTF-8
- **/examples/**
- **/ITest.java
+ **ITest.java
-
-
- org.apache.maven.plugins
- maven-shade-plugin
- 2.4.3
-
-
-
-
- ${cli.main.class}
-
-
-
-
-
-
- package
-
- shade
-
-
-
-
-
-
- de.thetaphi
- forbiddenapis
- 2.3
-
-
- false
-
-
- jdk-deprecated
- jdk-internal
-
- jdk-non-portable
-
-
-
-
-
-
- check
- testCheck
-
-
-
-
-
-
-
- de.thetaphi
- forbiddenapis
-
-