diff --git a/KumoIntelliJInspections.xml b/KumoIntelliJInspections.xml new file mode 100644 index 0000000..9b2edba --- /dev/null +++ b/KumoIntelliJInspections.xml @@ -0,0 +1,1420 @@ + + + + diff --git a/README.md b/README.md index 97ef2f2..74df022 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ Kumo's goal is to create a powerful and user friendly Word Cloud API in Java. Ku Please feel free to jump in and help improve Kumo! There are many places for performance optimization in Kumo! +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.kennycason/kumo/badge.svg?style=flat)](https://maven-badges.herokuapp.com/maven-central/com.kennycason/kumo)
+ ### Current Features - Draw Rectangle, Circle or Image Overlay word clouds. Image Overlay will draw words over all non-transparent pixels. @@ -26,7 +28,7 @@ Please feel free to jump in and help improve Kumo! There are many places for per com.kennycason kumo - 1.8 + 1.9 ``` @@ -382,3 +384,8 @@ Create a layered word cloud ``` kumo --input "https://www.haskell.org/, https://en.wikipedia.org/wiki/Haskell_(programming_language)" --output "/tmp/nintendo_vs_playstation.png" --type layered --background "https://raw.githubusercontent.com/kennycason/kumo/master/src/test/resources/backgrounds/haskell_1.bmp,https://raw.githubusercontent.com/kennycason/kumo/master/src/test/resources/backgrounds/haskell_2.bmp" --color "(0xFA6C07),(0xFF7614),(0xFF8936)|(0x080706),(0x3B3029),(0x47362A)" ``` + + +### Contributing + +My primary IDE of choice is IntelliJ due to their robust tooling as well as code analysis/inspections. If using [IntelliJ IDEA](https://www.jetbrains.com/idea/), I recommend importing `KumoIntelliJInspections.xml`. I am also consiering adding Checkstyle support. \ No newline at end of file diff --git a/kumo-api/src/main/java/com/kennycason/kumo/IntegrationTest.java b/kumo-api/src/main/java/com/kennycason/kumo/IntegrationTest.java deleted file mode 100644 index 5c0ee82..0000000 --- a/kumo-api/src/main/java/com/kennycason/kumo/IntegrationTest.java +++ /dev/null @@ -1,7 +0,0 @@ -package com.kennycason.kumo; - -/** - * Created by kenny on 2/21/16. - */ -public interface IntegrationTest { -} diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java b/kumo-api/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java rename to kumo-api/src/main/java/com/kennycason/kumo/nlp/tokenizer/WordTokenizer.java diff --git a/kumo-cli/pom.xml b/kumo-cli/pom.xml index 2f3c7bf..3611dff 100644 --- a/kumo-cli/pom.xml +++ b/kumo-cli/pom.xml @@ -11,21 +11,27 @@ kumo-cli + + com.kennycason.kumo.cli.KumoCli + + com.kennycason - kumo-api - 1.9 + kumo-core com.kennycason - kumo-nlp - 1.9 - - com.kennycason - kumo-core - 1.9 + kumo-tokenizers + + + + com.beust + jcommander + + + junit junit @@ -51,9 +57,30 @@ org.apache.maven.plugins maven-surefire-plugin + + org.apache.maven.plugins maven-shade-plugin + 2.4.3 + + + + + ${cli.main.class} + + + + + + + package + + shade + + + diff --git a/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java b/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java index 1217c85..cbba34c 100644 --- a/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java +++ b/kumo-cli/src/main/java/com/kennycason/kumo/cli/KumoCli.java @@ -18,10 +18,10 @@ import com.kennycason.kumo.font.scale.SqrtFontScalar; import com.kennycason.kumo.nlp.FrequencyAnalyzer; import com.kennycason.kumo.nlp.normalize.*; -import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer; -import com.kennycason.kumo.nlp.tokenizer.EnglishWordTokenizer; import com.kennycason.kumo.nlp.tokenizer.WhiteSpaceWordTokenizer; import com.kennycason.kumo.nlp.tokenizer.WordTokenizer; +import com.kennycason.kumo.nlp.tokenizers.ChineseWordTokenizer; +import com.kennycason.kumo.nlp.tokenizers.EnglishWordTokenizer; import com.kennycason.kumo.palette.ColorPalette; import com.kennycason.kumo.wordstart.CenterWordStart; import com.kennycason.kumo.wordstart.RandomWordStart; diff --git a/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java b/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java index 114e451..ad50a59 100644 --- a/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java +++ b/kumo-cli/src/main/java/com/kennycason/kumo/cli/ParenthesisSerializer.java @@ -19,11 +19,11 @@ public class ParenthesisSerializer { public static String serialize(final Collection collection) { if (collection.isEmpty()) { return ""; } - String joined = collection.stream() - .map(i -> i.toString()) - .collect(Collectors.joining("),(")); + final String joined = collection.stream() + .map(i -> i.toString()) + .collect(Collectors.joining("),(")); - return "(" + joined + ")"; + return '(' + joined + ')'; } public static List deserialize(final String value) { diff --git a/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java b/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java index 037f9cd..ffbc8fc 100644 --- a/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java +++ b/kumo-cli/src/test/java/com/kennycason/kumo/cli/KumoCliITest.java @@ -1,16 +1,13 @@ package com.kennycason.kumo.cli; -import com.kennycason.kumo.IntegrationTest; -import org.junit.Ignore; -import org.junit.experimental.categories.Category; +import org.junit.Test; /** * Created by kenny on 6/12/16. */ -@Category(IntegrationTest.class) -@Ignore public class KumoCliITest { + @Test public void simple() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -18,6 +15,7 @@ public void simple() { }); } + @Test public void stopwords() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -26,6 +24,7 @@ public void stopwords() { }); } + @Test public void wordCount() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -34,6 +33,7 @@ public void wordCount() { }); } + @Test public void widthAndHeight() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -43,6 +43,7 @@ public void widthAndHeight() { }); } + @Test public void randomWordStart() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -51,6 +52,7 @@ public void randomWordStart() { }); } + @Test public void font() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -63,7 +65,7 @@ public void font() { }); } - + @Test public void normalizer() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -72,7 +74,7 @@ public void normalizer() { }); } - + @Test public void backgroundImage() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -83,7 +85,7 @@ public void backgroundImage() { }); } - + @Test public void colorRgb() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -92,7 +94,7 @@ public void colorRgb() { }); } - + @Test public void colorRgbHex() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -101,7 +103,7 @@ public void colorRgbHex() { }); } - + @Test public void colorHex() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo", @@ -110,7 +112,7 @@ public void colorHex() { }); } - + @Test public void chinese() { KumoCli.main(new String[] { "--input", "https://zh.wikipedia.org/wiki/%E4%BB%BB%E5%A4%A9%E5%A0%82", @@ -119,7 +121,7 @@ public void chinese() { }); } - + @Test public void polar() { KumoCli.main(new String[] { "--input", "https://en.wikipedia.org/wiki/Nintendo, https://en.wikipedia.org/wiki/PlayStation", @@ -129,7 +131,7 @@ public void polar() { }); } - + @Test public void layered() { KumoCli.main(new String[] { "--input", "https://www.haskell.org/, https://en.wikipedia.org/wiki/Haskell_(programming_language)", diff --git a/kumo-core/pom.xml b/kumo-core/pom.xml index da75863..7bd788f 100644 --- a/kumo-core/pom.xml +++ b/kumo-core/pom.xml @@ -15,19 +15,17 @@ com.kennycason kumo-api - 1.9 - - - com.kennycason - kumo-nlp - 1.9 - test + log4j log4j + + org.jsoup + jsoup + commons-io commons-io @@ -36,14 +34,12 @@ org.apache.commons commons-lang3 - - com.beust - jcommander - com.github.davidmoten rtree + + junit junit @@ -65,10 +61,6 @@ org.apache.maven.plugins maven-javadoc-plugin - - org.apache.maven.plugins - maven-surefire-plugin - \ No newline at end of file diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/FrequencyAnalyzer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/CompositeFilter.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/Filter.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/StopWordFilter.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/UrlFilter.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/filter/WordSizeFilter.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/BubbleTextNormalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/CharacterStrippingNormalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/LowerCaseNormalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/Normalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/StringToHexNormalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/TrimToEmptyNormalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpperCaseNormalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/normalize/UpsideDownNormalizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/NoTokenizer.java diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java b/kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java similarity index 100% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java rename to kumo-core/src/main/java/com/kennycason/kumo/nlp/tokenizer/WhiteSpaceWordTokenizer.java diff --git a/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java b/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java index 7d2b813..f2357d9 100644 --- a/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java +++ b/kumo-core/src/test/java/com/kennycason/kumo/examples/PolarWordCloudITest.java @@ -12,8 +12,6 @@ import com.kennycason.kumo.font.scale.LinearFontScalar; import com.kennycason.kumo.font.scale.SqrtFontScalar; import com.kennycason.kumo.nlp.FrequencyAnalyzer; -import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer; -import com.kennycason.kumo.palette.ColorPalette; import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; import org.junit.Test; @@ -98,36 +96,6 @@ public void newyorkPolarRectangle() throws IOException { } @Test - public void chineseVsEnglishTideComments() throws IOException { - final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); - frequencyAnalyzer.setWordFrequenciesToReturn(750); - frequencyAnalyzer.setMinWordLength(3); - frequencyAnalyzer.setStopWords(loadStopWords()); - final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/english_tide.txt")); - - final FrequencyAnalyzer chineseFrequencyAnalyzer = new FrequencyAnalyzer(); - chineseFrequencyAnalyzer.setWordFrequenciesToReturn(750); - chineseFrequencyAnalyzer.setMinWordLength(2); - chineseFrequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer()); - final List wordFrequencies2 = chineseFrequencyAnalyzer.load(getInputStream("text/chinese_tide.txt")); - - final Dimension dimension = new Dimension(800, 600); - final PolarWordCloud wordCloud = new PolarWordCloud(dimension, CollisionMode.PIXEL_PERFECT, PolarBlendMode.BLUR); - wordCloud.setPadding(2); - wordCloud.setBackground(new RectangleBackground(dimension)); - wordCloud.setFontScalar(new SqrtFontScalar(10, 70)); - - final ColorPalette colorPalette = new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5)); - final ColorPalette colorPalette2 = new ColorPalette(new Color(0xFA8E8E), new Color(0xF77979), new Color(0xF55F5F), new Color(0xF24949)); - wordCloud.setColorPalette(colorPalette); - wordCloud.setColorPalette2(colorPalette2); - - final long startTime = System.currentTimeMillis(); - wordCloud.build(wordFrequencies, wordFrequencies2); - LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build"); - wordCloud.writeToFile("output/polar_tide_chinese_vs_english2.png"); - } - public void tidyCatLitter() throws IOException { final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); frequencyAnalyzer.setWordFrequenciesToReturn(400); diff --git a/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java b/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java index bcf21f8..9b1be6b 100644 --- a/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java +++ b/kumo-core/src/test/java/com/kennycason/kumo/examples/WordCloudITest.java @@ -1,7 +1,6 @@ package com.kennycason.kumo.examples; import com.kennycason.kumo.CollisionMode; -import com.kennycason.kumo.IntegrationTest; import com.kennycason.kumo.WordCloud; import com.kennycason.kumo.WordFrequency; import com.kennycason.kumo.bg.CircleBackground; @@ -13,13 +12,10 @@ import com.kennycason.kumo.font.scale.SqrtFontScalar; import com.kennycason.kumo.image.AngleGenerator; import com.kennycason.kumo.nlp.FrequencyAnalyzer; -import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer; import com.kennycason.kumo.palette.ColorPalette; import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; -import org.junit.Ignore; import org.junit.Test; -import org.junit.experimental.categories.Category; import java.awt.*; import java.io.FileInputStream; @@ -32,8 +28,6 @@ /** * Created by kenny on 6/29/14. */ -@Category(IntegrationTest.class) -@Ignore public class WordCloudITest { private static final Logger LOGGER = Logger.getLogger(WordCloudITest.class); @@ -220,26 +214,6 @@ public void datarankCircleLarge() throws IOException { wordCloud.writeToFile("output/datarank_wordcloud_circle_large2.png"); } - @Test - public void chineseCircle() throws IOException { - final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); - frequencyAnalyzer.setWordFrequenciesToReturn(600); - frequencyAnalyzer.setMinWordLength(2); - frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer()); - - final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_language.txt")); - final Dimension dimension = new Dimension(600, 600); - final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT); - wordCloud.setPadding(2); - wordCloud.setBackground(new CircleBackground(300)); - wordCloud.setColorPalette(new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5))); - wordCloud.setFontScalar(new SqrtFontScalar(12, 45)); - final long startTime = System.currentTimeMillis(); - wordCloud.build(wordFrequencies); - LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build"); - wordCloud.writeToFile("output/chinese_language_circle.png"); - } - @Test public void datarankEarthImage() throws IOException { final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); @@ -303,27 +277,6 @@ public void datarankCode() throws IOException { wordCloud.writeToFile("/tmp/datarank_code.png"); } - @Test - public void dragonChinese() throws IOException { - final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); - frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer()); - frequencyAnalyzer.setWordFrequenciesToReturn(900); - frequencyAnalyzer.setMinWordLength(1); - frequencyAnalyzer.setStopWords(Arrays.asList("是", "不", "了", "的", "个", "子")); - - final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_dragon.txt")); - final Dimension dimension = new Dimension(555, 555); - final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT); - wordCloud.setPadding(1); - wordCloud.setBackgroundColor(new Color(0xE35A05)); - wordCloud.setAngleGenerator(new AngleGenerator(0)); - wordCloud.setBackground(new PixelBoundryBackground(getInputStream("backgrounds/dragon.png"))); - wordCloud.setColorPalette(new ColorPalette(new Color(0x0), new Color(0x333333), new Color(0x555555))); - wordCloud.setFontScalar(new SqrtFontScalar(6, 50)); - wordCloud.build(wordFrequencies); - wordCloud.writeToFile("output/dragon_chinese.png"); - } - @Test public void largeCircleTest() throws IOException { final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); diff --git a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/WhiteSpaceWordTokenizerTest.java b/kumo-core/src/test/java/com/kennycason/kumo/nlp/tokenizers/WhiteSpaceWordTokenizerTest.java similarity index 94% rename from kumo-nlp/src/test/java/com/kennycason/kumo/nlp/WhiteSpaceWordTokenizerTest.java rename to kumo-core/src/test/java/com/kennycason/kumo/nlp/tokenizers/WhiteSpaceWordTokenizerTest.java index 210eefc..71c4ec7 100644 --- a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/WhiteSpaceWordTokenizerTest.java +++ b/kumo-core/src/test/java/com/kennycason/kumo/nlp/tokenizers/WhiteSpaceWordTokenizerTest.java @@ -1,4 +1,4 @@ -package com.kennycason.kumo.nlp; +package com.kennycason.kumo.nlp.tokenizers; import com.kennycason.kumo.nlp.tokenizer.WhiteSpaceWordTokenizer; import com.kennycason.kumo.nlp.tokenizer.WordTokenizer; diff --git a/kumo-tokenizers/README.md b/kumo-tokenizers/README.md new file mode 100644 index 0000000..3bc7647 --- /dev/null +++ b/kumo-tokenizers/README.md @@ -0,0 +1,82 @@ +# Kumo Tokenizers + +This module is separated from Kumo Core to prevent Kumo Core from becoming too bloated. This module will contain language tokenizers for various languages. +Currently, the only languages included are the `EnglishWordTokenizer` and the `ChineseWordTokenizer`. + +*Note*: All the examples will soon be extracted to another module for better clarity. This readme is just a place holder while I refactor. + +Below are a few examples of how to use the `ChineseWordTokenizer`. + +```java +@Test +public void dragonChinese() throws IOException { + final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); + frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer()); + frequencyAnalyzer.setWordFrequenciesToReturn(900); + frequencyAnalyzer.setMinWordLength(1); + frequencyAnalyzer.setStopWords(Arrays.asList("是", "不", "了", "的", "个", "子")); + + final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_dragon.txt")); + final Dimension dimension = new Dimension(555, 555); + final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT); + wordCloud.setPadding(1); + wordCloud.setBackgroundColor(new Color(0xE35A05)); + wordCloud.setAngleGenerator(new AngleGenerator(0)); + wordCloud.setBackground(new PixelBoundryBackground(getInputStream("backgrounds/dragon.png"))); + wordCloud.setColorPalette(new ColorPalette(new Color(0x0), new Color(0x333333), new Color(0x555555))); + wordCloud.setFontScalar(new SqrtFontScalar(6, 50)); + wordCloud.build(wordFrequencies); + wordCloud.writeToFile("output/dragon_chinese.png"); +} + +@Test +public void chineseCircle() throws IOException { + final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); + frequencyAnalyzer.setWordFrequenciesToReturn(600); + frequencyAnalyzer.setMinWordLength(2); + frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer()); + + final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/chinese_language.txt")); + final Dimension dimension = new Dimension(600, 600); + final WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT); + wordCloud.setPadding(2); + wordCloud.setBackground(new CircleBackground(300)); + wordCloud.setColorPalette(new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5))); + wordCloud.setFontScalar(new SqrtFontScalar(12, 45)); + final long startTime = System.currentTimeMillis(); + wordCloud.build(wordFrequencies); + LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build"); + wordCloud.writeToFile("output/chinese_language_circle.png"); +} + +@Test +public void chineseVsEnglishTideComments() throws IOException { + final FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer(); + frequencyAnalyzer.setWordFrequenciesToReturn(750); + frequencyAnalyzer.setMinWordLength(3); + frequencyAnalyzer.setStopWords(loadStopWords()); + final List wordFrequencies = frequencyAnalyzer.load(getInputStream("text/english_tide.txt")); + + final FrequencyAnalyzer chineseFrequencyAnalyzer = new FrequencyAnalyzer(); + chineseFrequencyAnalyzer.setWordFrequenciesToReturn(750); + chineseFrequencyAnalyzer.setMinWordLength(2); + chineseFrequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer()); + final List wordFrequencies2 = chineseFrequencyAnalyzer.load(getInputStream("text/chinese_tide.txt")); + + final Dimension dimension = new Dimension(800, 600); + final PolarWordCloud wordCloud = new PolarWordCloud(dimension, CollisionMode.PIXEL_PERFECT, PolarBlendMode.BLUR); + wordCloud.setPadding(2); + wordCloud.setBackground(new RectangleBackground(dimension)); + wordCloud.setFontScalar(new SqrtFontScalar(10, 70)); + + final ColorPalette colorPalette = new ColorPalette(new Color(0xD5CFFA), new Color(0xBBB1FA), new Color(0x9A8CF5), new Color(0x806EF5)); + final ColorPalette colorPalette2 = new ColorPalette(new Color(0xFA8E8E), new Color(0xF77979), new Color(0xF55F5F), new Color(0xF24949)); + wordCloud.setColorPalette(colorPalette); + wordCloud.setColorPalette2(colorPalette2); + + final long startTime = System.currentTimeMillis(); + wordCloud.build(wordFrequencies, wordFrequencies2); + LOGGER.info("Took " + (System.currentTimeMillis() - startTime) + "ms to build"); + wordCloud.writeToFile("output/polar_tide_chinese_vs_english2.png"); +} +``` \ No newline at end of file diff --git a/kumo-nlp/pom.xml b/kumo-tokenizers/pom.xml similarity index 83% rename from kumo-nlp/pom.xml rename to kumo-tokenizers/pom.xml index 03df400..aea8383 100644 --- a/kumo-nlp/pom.xml +++ b/kumo-tokenizers/pom.xml @@ -9,35 +9,25 @@ 4.0.0 - kumo-nlp + kumo-tokenizers com.kennycason kumo-api - 1.9 + + log4j log4j - - commons-io - commons-io - - - org.apache.commons - commons-lang3 - - - org.jsoup - jsoup - junit junit test + org.languagetool diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/ChineseWordTokenizer.java b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizer.java similarity index 89% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/ChineseWordTokenizer.java rename to kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizer.java index a55a568..0b30095 100644 --- a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/ChineseWordTokenizer.java +++ b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizer.java @@ -1,5 +1,6 @@ -package com.kennycason.kumo.nlp.tokenizer; +package com.kennycason.kumo.nlp.tokenizers; +import com.kennycason.kumo.nlp.tokenizer.WordTokenizer; import org.languagetool.language.Chinese; import org.languagetool.tokenizers.Tokenizer; @@ -7,11 +8,8 @@ import java.util.List; public class ChineseWordTokenizer implements WordTokenizer { - private static final Chinese CHINESE = new Chinese(); - public ChineseWordTokenizer() {} - @Override public List tokenize(final String sentence) { final Tokenizer tokenizer = CHINESE.getWordTokenizer(); diff --git a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/EnglishWordTokenizer.java b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/EnglishWordTokenizer.java similarity index 87% rename from kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/EnglishWordTokenizer.java rename to kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/EnglishWordTokenizer.java index 98af77a..57d5d5a 100644 --- a/kumo-nlp/src/main/java/com/kennycason/kumo/nlp/tokenizer/EnglishWordTokenizer.java +++ b/kumo-tokenizers/src/main/java/com/kennycason/kumo/nlp/tokenizers/EnglishWordTokenizer.java @@ -1,5 +1,6 @@ -package com.kennycason.kumo.nlp.tokenizer; +package com.kennycason.kumo.nlp.tokenizers; +import com.kennycason.kumo.nlp.tokenizer.WordTokenizer; import org.languagetool.language.English; import org.languagetool.tokenizers.Tokenizer; @@ -7,7 +8,6 @@ import java.util.List; public class EnglishWordTokenizer implements WordTokenizer { - private static final English ENGLISH = new English(); public EnglishWordTokenizer() {} diff --git a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/ChineseWordTokenizerTest.java b/kumo-tokenizers/src/test/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizerTest.java similarity index 92% rename from kumo-nlp/src/test/java/com/kennycason/kumo/nlp/ChineseWordTokenizerTest.java rename to kumo-tokenizers/src/test/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizerTest.java index 3aa9b10..fd47b0f 100644 --- a/kumo-nlp/src/test/java/com/kennycason/kumo/nlp/ChineseWordTokenizerTest.java +++ b/kumo-tokenizers/src/test/java/com/kennycason/kumo/nlp/tokenizers/ChineseWordTokenizerTest.java @@ -1,6 +1,5 @@ -package com.kennycason.kumo.nlp; +package com.kennycason.kumo.nlp.tokenizers; -import com.kennycason.kumo.nlp.tokenizer.ChineseWordTokenizer; import com.kennycason.kumo.nlp.tokenizer.WordTokenizer; import org.apache.log4j.Logger; import org.junit.Test; diff --git a/kumo-nlp/src/test/resources/log4j.xml b/kumo-tokenizers/src/test/resources/log4j.xml similarity index 100% rename from kumo-nlp/src/test/resources/log4j.xml rename to kumo-tokenizers/src/test/resources/log4j.xml diff --git a/pom.xml b/pom.xml index d2448f7..45a2cb3 100755 --- a/pom.xml +++ b/pom.xml @@ -6,20 +6,19 @@ com.kennycason kumo + 1.9 pom ${project.artifactId} Kumo's goal is to create a powerful and user friendly Word Cloud API in Java. Kumo directly generates an image file without the need to create an applet (as many other libraries do). https://github.com/kennycason/kumo - kumo-nlp - kumo-core kumo-api + kumo-core kumo-cli + kumo-tokenizers - 1.9 - UTF-8 @@ -30,11 +29,31 @@ >${encoding} ${java.target} - com.kennycason.kumo.cli.KumoCli + + com.kennycason + kumo-api + ${project.version} + + + com.kennycason + kumo-core + ${project.version} + + + com.kennycason + kumo-cli + ${project.version} + + + com.kennycason + kumo-tokenizers + ${project.version} + + log4j log4j @@ -209,79 +228,12 @@ -Xms256m -Xmx768m -XX:+CMSClassUnloadingEnabled -Dfile.encoding=UTF-8 - **/examples/** - **/ITest.java + **ITest.java - - - org.apache.maven.plugins - maven-shade-plugin - 2.4.3 - - - - - ${cli.main.class} - - - - - - - package - - shade - - - - - - - de.thetaphi - forbiddenapis - 2.3 - - - false - - - jdk-deprecated - jdk-internal - - jdk-non-portable - - - - - - - check - testCheck - - - - - - - - de.thetaphi - forbiddenapis - -