Skip to content
Permalink
Browse files

Initial commit

  • Loading branch information...
quimcastella committed Dec 11, 2012
0 parents commit f8a082c6769931861062890e3c003ed5ab214d8e
Showing with 6,604 additions and 0 deletions.
  1. +12 −0 .classpath
  2. +17 −0 .project
  3. +41 −0 README.md
  4. BIN bin/io/DataPath.class
  5. BIN bin/io/FileLoader$1.class
  6. BIN bin/io/FileLoader.class
  7. BIN bin/io/FileLoaderWeight$1.class
  8. BIN bin/io/FileLoaderWeight.class
  9. BIN bin/io/HTMLSaver.class
  10. BIN bin/io/Loader.class
  11. BIN bin/wordcram/Anglers$1.class
  12. BIN bin/wordcram/Anglers$2.class
  13. BIN bin/wordcram/Anglers$3.class
  14. BIN bin/wordcram/Anglers$4.class
  15. BIN bin/wordcram/Anglers.class
  16. BIN bin/wordcram/BBTree.class
  17. BIN bin/wordcram/BBTreeBuilder.class
  18. BIN bin/wordcram/Colorers$1.class
  19. BIN bin/wordcram/Colorers$2.class
  20. BIN bin/wordcram/Colorers$3.class
  21. BIN bin/wordcram/Colorers$4.class
  22. BIN bin/wordcram/Colorers$5.class
  23. BIN bin/wordcram/Colorers.class
  24. BIN bin/wordcram/CoordProp.class
  25. BIN bin/wordcram/Coordinated$1.class
  26. BIN bin/wordcram/Coordinated$2.class
  27. BIN bin/wordcram/Coordinated$3.class
  28. BIN bin/wordcram/Coordinated$4.class
  29. BIN bin/wordcram/Coordinated.class
  30. BIN bin/wordcram/EngineWord.class
  31. BIN bin/wordcram/Fonters$1.class
  32. BIN bin/wordcram/Fonters$2.class
  33. BIN bin/wordcram/Fonters.class
  34. BIN bin/wordcram/HsbWordColorer.class
  35. BIN bin/wordcram/MDS.class
  36. BIN bin/wordcram/PixProd$1.class
  37. BIN bin/wordcram/PixProd$2.class
  38. BIN bin/wordcram/PixProd.class
  39. BIN bin/wordcram/Placers$1.class
  40. BIN bin/wordcram/Placers$2.class
  41. BIN bin/wordcram/Placers$3.class
  42. BIN bin/wordcram/Placers.class
  43. BIN bin/wordcram/PlottingWordNudger.class
  44. BIN bin/wordcram/PlottingWordPlacer.class
  45. BIN bin/wordcram/RandomWordNudger.class
  46. BIN bin/wordcram/RenderOptions.class
  47. BIN bin/wordcram/Sizers$1.class
  48. BIN bin/wordcram/Sizers$2.class
  49. BIN bin/wordcram/Sizers.class
  50. BIN bin/wordcram/SpiralWordNudger.class
  51. BIN bin/wordcram/SwirlWordPlacer.class
  52. BIN bin/wordcram/Timer.class
  53. BIN bin/wordcram/UpperLeftWordPlacer.class
  54. BIN bin/wordcram/WaveWordPlacer.class
  55. BIN bin/wordcram/Word.class
  56. BIN bin/wordcram/WordAngler.class
  57. BIN bin/wordcram/WordColorer.class
  58. BIN bin/wordcram/WordCompCenter.class
  59. BIN bin/wordcram/WordCompIdf.class
  60. BIN bin/wordcram/WordCompString.class
  61. BIN bin/wordcram/WordCompTWeight.class
  62. BIN bin/wordcram/WordCounter.class
  63. BIN bin/wordcram/WordCram$TextCase.class
  64. BIN bin/wordcram/WordCram.class
  65. BIN bin/wordcram/WordCramEngine.class
  66. BIN bin/wordcram/WordFonter.class
  67. BIN bin/wordcram/WordNudger.class
  68. BIN bin/wordcram/WordPlacer.class
  69. BIN bin/wordcram/WordShaper.class
  70. BIN bin/wordcram/WordSizer.class
  71. BIN bin/wordcram/WordSorterAndScaler.class
  72. BIN bin/wordcram/WordStorm.class
  73. BIN bin/wordcram/text/Html.class
  74. BIN bin/wordcram/text/Html2Text.class
  75. BIN bin/wordcram/text/Text.class
  76. BIN bin/wordcram/text/TextFile.class
  77. BIN bin/wordcram/text/TextSource.class
  78. BIN bin/wordcram/text/WebPage.class
  79. BIN bin/wordstorm/Algorithm.class
  80. BIN bin/wordstorm/AppletConf.class
  81. BIN bin/wordstorm/Combined.class
  82. BIN bin/wordstorm/Force.class
  83. BIN bin/wordstorm/Iterative.class
  84. BIN bin/wordstorm/StormConf.class
  85. BIN bin/wslauncher/IndepLauncher.class
  86. BIN bin/wslauncher/StormLauncher.class
  87. +1 −0 examples/data/input/PresidentialDebate/Obama1.txt
  88. +1 −0 examples/data/input/PresidentialDebate/Obama2.txt
  89. +1 −0 examples/data/input/PresidentialDebate/Obama3.txt
  90. +1 −0 examples/data/input/PresidentialDebate/Romney1.txt
  91. +1 −0 examples/data/input/PresidentialDebate/Romney2.txt
  92. +1 −0 examples/data/input/PresidentialDebate/Romney3.txt
  93. +46 −0 examples/data/output/PresidentialDebate/alld 25w 50tol lC idfcolor combined col ang 5it.html
  94. BIN examples/data/output/PresidentialDebate/alld 25w 50tol lC idfcolor combined col ang 5it/Obama1.png
  95. BIN examples/data/output/PresidentialDebate/alld 25w 50tol lC idfcolor combined col ang 5it/Obama2.png
  96. BIN examples/data/output/PresidentialDebate/alld 25w 50tol lC idfcolor combined col ang 5it/Obama3.png
  97. BIN examples/data/output/PresidentialDebate/alld 25w 50tol lC idfcolor combined col ang 5it/Romney1.png
  98. BIN examples/data/output/PresidentialDebate/alld 25w 50tol lC idfcolor combined col ang 5it/Romney2.png
  99. BIN examples/data/output/PresidentialDebate/alld 25w 50tol lC idfcolor combined col ang 5it/Romney3.png
  100. +46 −0 examples/data/output/PresidentialDebate/alld 25w noTol lC 5it.html
  101. BIN examples/data/output/PresidentialDebate/alld 25w noTol lC 5it/Obama1.png
  102. BIN examples/data/output/PresidentialDebate/alld 25w noTol lC 5it/Obama2.png
  103. BIN examples/data/output/PresidentialDebate/alld 25w noTol lC 5it/Obama3.png
  104. BIN examples/data/output/PresidentialDebate/alld 25w noTol lC 5it/Romney1.png
  105. BIN examples/data/output/PresidentialDebate/alld 25w noTol lC 5it/Romney2.png
  106. BIN examples/data/output/PresidentialDebate/alld 25w noTol lC 5it/Romney3.png
  107. BIN examples/images/complex.png
  108. BIN examples/images/programmes.png
  109. BIN library/WordCram.jar
  110. BIN library/cue.language.jar
  111. BIN library/jsoup-1.3.3.jar
  112. BIN library/processing/core.jar
  113. BIN library/processing/itext.jar
  114. BIN library/processing/pdf.jar
  115. +11 −0 src/io/DataPath.java
  116. +125 −0 src/io/FileLoader.java
  117. +129 −0 src/io/FileLoaderWeight.java
  118. +81 −0 src/io/HTMLSaver.java
  119. +77 −0 src/io/Loader.java
  120. +169 −0 src/wordcram/Anglers.java
  121. +138 −0 src/wordcram/BBTree.java
  122. +80 −0 src/wordcram/BBTreeBuilder.java
  123. +91 −0 src/wordcram/Colorers.java
  124. +145 −0 src/wordcram/CoordProp.java
  125. +240 −0 src/wordcram/Coordinated.java
  126. +115 −0 src/wordcram/EngineWord.java
  127. +41 −0 src/wordcram/Fonters.java
  128. +43 −0 src/wordcram/HsbWordColorer.java
  129. +459 −0 src/wordcram/MDS.java
  130. +146 −0 src/wordcram/PixProd.java
  131. +86 −0 src/wordcram/Placers.java
  132. +53 −0 src/wordcram/PlottingWordNudger.java
  133. +51 −0 src/wordcram/PlottingWordPlacer.java
  134. +58 −0 src/wordcram/RandomWordNudger.java
  135. +8 −0 src/wordcram/RenderOptions.java
  136. +79 −0 src/wordcram/Sizers.java
  137. +46 −0 src/wordcram/SpiralWordNudger.java
  138. +40 −0 src/wordcram/SwirlWordPlacer.java
  139. +24 −0 src/wordcram/Timer.java
  140. +42 −0 src/wordcram/UpperLeftWordPlacer.java
  141. +40 −0 src/wordcram/WaveWordPlacer.java
  142. +395 −0 src/wordcram/Word.java
  143. +36 −0 src/wordcram/WordAngler.java
  144. +41 −0 src/wordcram/WordColorer.java
  145. +29 −0 src/wordcram/WordCompCenter.java
  146. +18 −0 src/wordcram/WordCompIdf.java
  147. +13 −0 src/wordcram/WordCompString.java
  148. +19 −0 src/wordcram/WordCompTWeight.java
  149. +113 −0 src/wordcram/WordCounter.java
  150. +933 −0 src/wordcram/WordCram.java
  151. +384 −0 src/wordcram/WordCramEngine.java
  152. +40 −0 src/wordcram/WordFonter.java
  153. +60 −0 src/wordcram/WordNudger.java
  154. +60 −0 src/wordcram/WordPlacer.java
  155. +79 −0 src/wordcram/WordShaper.java
  156. +62 −0 src/wordcram/WordSizer.java
  157. +121 −0 src/wordcram/WordSorterAndScaler.java
  158. +358 −0 src/wordcram/WordStorm.java
  159. +30 −0 src/wordcram/text/Html.java
  160. +25 −0 src/wordcram/text/Html2Text.java
  161. +30 −0 src/wordcram/text/Text.java
  162. +38 −0 src/wordcram/text/TextFile.java
  163. +21 −0 src/wordcram/text/TextSource.java
  164. +36 −0 src/wordcram/text/WebPage.java
  165. +5 −0 src/wordstorm/Algorithm.java
  166. +37 −0 src/wordstorm/AppletConf.java
  167. +248 −0 src/wordstorm/Combined.java
  168. +215 −0 src/wordstorm/Force.java
  169. +182 −0 src/wordstorm/Iterative.java
  170. +108 −0 src/wordstorm/StormConf.java
  171. +41 −0 src/wslauncher/IndepLauncher.java
  172. +42 −0 src/wslauncher/StormLauncher.java
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="lib" path="library/cue.language.jar"/>
<classpathentry kind="lib" path="library/processing/core.jar"/>
<classpathentry kind="lib" path="library/processing/itext.jar"/>
<classpathentry kind="lib" path="library/jsoup-1.3.3.jar"/>
<classpathentry kind="lib" path="library/processing/pdf.jar"/>
<classpathentry kind="lib" path="library/WordCram.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="output" path="bin"/>
</classpath>
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>WordStorm</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>
@@ -0,0 +1,41 @@
The objective of this project is to generate **word storms**,
multiples of word clouds to afford visual comparison of groups of documents.
Just as a storm is a group of clouds, a word storm is a group of
word clouds. Each cloud in the storm represents a subset of
the corpus. For example, a storm might contain one cloud
per document, or alternatively one cloud to represent all the
documents written in each year, or one cloud to represent
each track of an academic conference, etc.

![Word Storm](https://github.com/quimcastella/WordStorm/examples/complex.png)

In order to make the clouds easy to compare,
words that appear in different clouds are placed in similar locations
and more informative words are emphasized.
In this way, similar documents are be represented by visually similar clouds.

## Create a Word Storm

This project allows you to create word storms from your texts,
giving you options to personalize their apperance,
so you can create beatiful figures to analyze your documents.

The algorithms to generate the storm are build on top of **WordCram**
([web](http://wordcram.org/), [gitHub](https://github.com/danbernier/WordCram)),
a flexible word cloud generator that creates beautiful clouds using the pupular [Wordle's](http://www.wordle.net/) layout.

### Usage

1. Set the path to the text files using 'io.DataPath.java'.
The project takes the data from 'datapath/input/folder' and returns the storm in
'datapath/output/folder/parameters'
2. Decide the cloud's size in 'AppletConf'.
3. Decide the storm configuration using 'StormConf' (number of files, number of words, colors, angles...)
4. Create a stom!

### Examples

See examples of usage in 'src/wslauncher':

1. 'StormLauncher.java' to create a coordinated word storm.
2. 'IndepLauncher.java' to create independent clouds.
BIN +442 Bytes bin/io/DataPath.class
Binary file not shown.
BIN +803 Bytes bin/io/FileLoader$1.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN +3.67 KB bin/io/HTMLSaver.class
Binary file not shown.
BIN +3.36 KB bin/io/Loader.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN +13.2 KB bin/wordcram/MDS.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN +592 Bytes bin/wordcram/Sizers.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN +972 Bytes bin/wordcram/Timer.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

@@ -0,0 +1,46 @@
<!DOCTYPE html>
<html>
<body>
<h3>
Word Storm</h3><table border= "0"><tr>
</tr>
<tr>
<td>
<a href= "alld 25w 50tol lC idfcolor combined col ang 5it/Obama1.png"><img src="alld 25w 50tol lC idfcolor combined col ang 5it/Obama1.png" alt=Obama1.txt width="320" height="240" />
<p>Obama1.txt</p>
</a>
</td>
<td>
<a href= "alld 25w 50tol lC idfcolor combined col ang 5it/Obama2.png"><img src="alld 25w 50tol lC idfcolor combined col ang 5it/Obama2.png" alt=Obama2.txt width="320" height="240" />
<p>Obama2.txt</p>
</a>
</td>
<td>
<a href= "alld 25w 50tol lC idfcolor combined col ang 5it/Obama3.png"><img src="alld 25w 50tol lC idfcolor combined col ang 5it/Obama3.png" alt=Obama3.txt width="320" height="240" />
<p>Obama3.txt</p>
</a>
</td>
</tr>
<tr>
<td>
<a href= "alld 25w 50tol lC idfcolor combined col ang 5it/Romney1.png"><img src="alld 25w 50tol lC idfcolor combined col ang 5it/Romney1.png" alt=Romney1.txt width="320" height="240" />
<p>Romney1.txt</p>
</a>
</td>
<td>
<a href= "alld 25w 50tol lC idfcolor combined col ang 5it/Romney2.png"><img src="alld 25w 50tol lC idfcolor combined col ang 5it/Romney2.png" alt=Romney2.txt width="320" height="240" />
<p>Romney2.txt</p>
</a>
</td>
<td>
<a href= "alld 25w 50tol lC idfcolor combined col ang 5it/Romney3.png"><img src="alld 25w 50tol lC idfcolor combined col ang 5it/Romney3.png" alt=Romney3.txt width="320" height="240" />
<p>Romney3.txt</p>
</a>
</td>
</tr>
<tr>
</tr>
<tr>
</tr>
</table><p>folder: PresidentialDebate</p><p>files: 6</p><p>parameters: alld 25w 50tol lC idfcolor combined col ang 5it</p></body>
</html>
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,46 @@
<!DOCTYPE html>
<html>
<body>
<h3>
Word Storm</h3><table border= "0"><tr>
</tr>
<tr>
<td>
<a href= "alld 25w noTol lC 5it/Obama1.png"><img src="alld 25w noTol lC 5it/Obama1.png" alt=Obama1.txt width="320" height="240" />
<p>Obama1.txt</p>
</a>
</td>
<td>
<a href= "alld 25w noTol lC 5it/Obama2.png"><img src="alld 25w noTol lC 5it/Obama2.png" alt=Obama2.txt width="320" height="240" />
<p>Obama2.txt</p>
</a>
</td>
<td>
<a href= "alld 25w noTol lC 5it/Obama3.png"><img src="alld 25w noTol lC 5it/Obama3.png" alt=Obama3.txt width="320" height="240" />
<p>Obama3.txt</p>
</a>
</td>
</tr>
<tr>
<td>
<a href= "alld 25w noTol lC 5it/Romney1.png"><img src="alld 25w noTol lC 5it/Romney1.png" alt=Romney1.txt width="320" height="240" />
<p>Romney1.txt</p>
</a>
</td>
<td>
<a href= "alld 25w noTol lC 5it/Romney2.png"><img src="alld 25w noTol lC 5it/Romney2.png" alt=Romney2.txt width="320" height="240" />
<p>Romney2.txt</p>
</a>
</td>
<td>
<a href= "alld 25w noTol lC 5it/Romney3.png"><img src="alld 25w noTol lC 5it/Romney3.png" alt=Romney3.txt width="320" height="240" />
<p>Romney3.txt</p>
</a>
</td>
</tr>
<tr>
</tr>
<tr>
</tr>
</table><p>folder: PresidentialDebate</p><p>files: 6</p><p>parameters: alld 25w noTol lC 5it</p></body>
</html>
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN +32.6 KB library/WordCram.jar
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,11 @@
package io;
/*
* Data path to the folder that contains
* the input and where the output will be stored
*/
public class DataPath {
public static String dataPath() {
String path = "/home/quim/Documents/workspace/WordStorm/examples/data";
return path;
}
}
@@ -0,0 +1,125 @@
package io;

import java.io.*;
import java.util.Arrays;

import processing.core.PApplet;

import wordcram.Word;
import wordcram.WordCounter;
import wordcram.WordCram;
import wordcram.WordSorterAndScaler;
import wordcram.WordCram.TextCase;
import wordstorm.*;

/*
* Quim Castella
*
* Loads .txt files in the given folder
* (Omits files starting by "_meta")
*/

public class FileLoader extends Loader{
private static String folder;
public static String[] fileNames;

public FileLoader(int maxFiles, String folder){
FileLoader.folder = folder;
this.maxFiles = maxFiles;
this.inPath = DataPath.dataPath()+"/input/"+folder+"/";
this.numClouds = loadFiles();
}

private static String imgName(int cloudIndex){
String name = fileNames[cloudIndex];
return name.substring(0, name.lastIndexOf('.'))+".png";
}

@Override
public String getOutput( StormConf conf, int cloudIndex ){
return DataPath.dataPath()+"/output/"+folder+"/"+conf+"/"+imgName(cloudIndex);
}

@Override
public String getOutputMovie( StormConf conf, int cloudIndex, int frameNum ){
String movieName = imgName(cloudIndex);
movieName = movieName.substring(0, movieName.lastIndexOf('.'))+
movieNumber(frameNum)+".png";
return DataPath.dataPath()+"/output/"+folder+"/"+
conf+"/Movie/"+movieName;
}
@Override
public String getLocalOutput( StormConf conf, int cloudIndex ){
return conf+"/"+imgName(cloudIndex);
}
@Override
public String getHTMLFolder(){
return DataPath.dataPath()+"/output/"+folder+"/";
}
@Override
public String getFolder(){
return folder;
}
@Override
public String getHTMLOutput(int maxWords){
String printMF = maxFiles == -1 || numClouds < maxFiles ? "all" :
""+numClouds;
return DataPath.dataPath()+"/output/"+folder+"/comp "+printMF+"d.html";
}
@Override
public String getStormHTML(StormConf conf){
return DataPath.dataPath()+"/output/"+folder+"/"+ conf+".html";
}
@Override
public String getStormLog(StormConf conf){
return DataPath.dataPath()+"/output/"+folder+"/"+conf+".txt";
}

private int loadFiles(){
FilenameFilter filter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".txt") && !name.startsWith("_meta");
}
};
File directory = new File(inPath);
System.out.println(inPath);
File[] files = directory.listFiles(filter);
if(files == null){
System.err.println("No files");
System.exit(0);
}
Arrays.sort(files);
int numClouds = maxFiles == -1 ? files.length :
Math.min(files.length, maxFiles);
fileNames = new String[numClouds];

System.out.println(numClouds+" files");
for (int ii = 0; ii < numClouds; ++ii) {
fileNames[ii] = files[ii].getName();
//System.out.println(fileNames[ii]);
}
return numClouds;
}

public String getName(int index){
return fileNames[index];
}

@Override
public void loadText(PApplet parent, WordCram w, int index){
String source = PApplet.join(parent.
loadStrings(inPath+"/"+fileNames[index]), ' ');
source = w.textCase == TextCase.Lower ? source.toLowerCase()
: w.textCase == TextCase.Upper ? source.toUpperCase()
: source;

// w = w.withStopWords("london twitter added directory http://wefollow.com rt uk");
// w = w.withStopWords("quote");
String extraStopWords = w.getExtraStopWords();
boolean excludeNumbers = w.getExcludeNumbers();
Word[] words = new WordCounter().withExtraStopWords(extraStopWords).
shouldExcludeNumbers(excludeNumbers).count(source);
words = new WordSorterAndScaler().sortAndScale(words,false);
w.setWords(words);
}
}