Skip to content

Commit

Permalink
Updated README. Added some Javadoc. Added some new unit tests. Increa…
Browse files Browse the repository at this point in the history
…sed the version to 1.0.4
  • Loading branch information
stoyanr committed Dec 2, 2012
1 parent e4dd1e7 commit 382f5d2
Show file tree
Hide file tree
Showing 10 changed files with 385 additions and 169 deletions.
200 changes: 135 additions & 65 deletions README.md

Large diffs are not rendered by default.

204 changes: 102 additions & 102 deletions wordcounter/pom.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
# $Id: $
#
#
# Copyright 2012 Stoyan Rachev (stoyanr@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -18,106 +18,106 @@
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.stoyanr.wordcounter</groupId>
<artifactId>wordcounter</artifactId>
<version>1.0.3</version>
<packaging>jar</packaging>
<name>Wordcounter</name>
<url>http://maven.apache.org</url>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:git:git@github.com:stoyanr/Wordcounter.git</connection>
<developerConnection>scm:git:git@github.com:stoyanr/Wordcounter.git</developerConnection>
<url>git@github.com:stoyanr/Wordcounter.git</url>
</scm>
<developers>
<developer>
<id>stoyanr</id>
<name>Stoyan Rachev</name>
<email>stoyanr@gmail.com</email>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>com.stoyanr.wordcounter.Main</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.8.1</version>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<modelVersion>4.0.0</modelVersion>
<groupId>com.stoyanr.wordcounter</groupId>
<artifactId>wordcounter</artifactId>
<version>1.0.4</version>
<packaging>jar</packaging>
<name>Wordcounter</name>
<url>http://maven.apache.org</url>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:git:git@github.com:stoyanr/Wordcounter.git</connection>
<developerConnection>scm:git:git@github.com:stoyanr/Wordcounter.git</developerConnection>
<url>git@github.com:stoyanr/Wordcounter.git</url>
</scm>
<developers>
<developer>
<id>stoyanr</id>
<name>Stoyan Rachev</name>
<email>stoyanr@gmail.com</email>
</developer>
</developers>
<dependencies>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>com.stoyanr.wordcounter.Main</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.8.1</version>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>
19 changes: 19 additions & 0 deletions wordcounter/src/main/java/com/stoyanr/util/ForkJoinComputer.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,25 @@
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;

/**
* A generic Fork / Join computer. It divides the initial size by 2 until either reaching the
* specified parallelism level or falling below the specified threshold, computes each portion
* serially using the specified computer, and then joins the results of all computations using
* the specified merger.
* <p>
* To use this class, simply instantiate it with the appropriate lambdas and then call its
* {@code compute} method:
* <p>
* <pre>
* // Calculate the sum of all integers from 1 to n, using 1000 as a threshold
* new ForkJoinComputer<Integer>(n, 1000,
* (lo, hi) -> { int sum = 0; for (int i = lo + 1; i <= hi; i++) sum += i; return sum; },
* (a, b) -> a + b).compute();
* </pre>
*
* @author Stoyan Rachev
* @param <T>
*/
public class ForkJoinComputer<T> {

public static final int DEFAULT_PAR_LEVEL = Runtime.getRuntime().availableProcessors();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@
import java.util.concurrent.TimeUnit;
import java.util.functions.Block;

/**
* A generic Producer / Consumer executor. It starts a single producer task and multiple mediator
* and consumer tasks with their number equal to the specified parallelism level. The producer puts
* {@code T1} instances in a {@code BlockingQueue<T1>}. The mediators take these instances from
* there, convert them to {@code T2}, and put them in another blocking queue of type
* {@code BlockingQueue<T2>}. Finally, the consumers take the {@code T2} instances from the second
* blocking queue and process them.
*
* @author Stoyan Rachev
* @param <T1>
* @param <T2>
*/
public class ProducerConsumerExecutor<T1, T2> {

public static final int DEFAULT_PAR_LEVEL = Runtime.getRuntime().availableProcessors();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@

import com.stoyanr.util.Logger;

/**
* A sorted list of word usage counts mapped to all words that have such counts. Provides methods
* for adding top word counts, checking for equality, and printing. Internally, this class
* encapsulates a {@code SortedMap<Integer, Set<String>>}. Some of the analysis methods
* of {@link WordCountAnalyzer} return instances of this class.
*
* @author Stoyan Rachev
*/
public class TopWordCounts {

private final int number;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@

import com.stoyanr.util.ForkJoinComputer;

/**
* A facility that provides methods for performing analysis on the word counts produced by
* {@link WordCounter}, such as finding the top N most used words. It is initialized with a
* {@link WordCounts} instance, a flag indicating whether to use parallel processing, and
* (optionally) a parallelism level.
* <p>
* To use this class, simply instantiate it and then call one of its methods:
* <p>
* <pre>
* // Find the top 10 most used words in wc
* new WordCountAnalyzer(wc, true).findTop(10, (x, y) -> (y - x));
* </pre>
*
* @author Stoyan Rachev
*/
public class WordCountAnalyzer {

private static final int THRESHOLD = 32 * 1024;
Expand Down
19 changes: 17 additions & 2 deletions wordcounter/src/main/java/com/stoyanr/wordcounter/WordCounter.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,25 @@
import com.stoyanr.util.FileUtils;
import com.stoyanr.util.ProducerConsumerExecutor;

/**
* A word counter facility that provides a method for counting words in a {@code Path} representing
* a file or a directory tree, either serially or in parallel. It is initialized with a path,
* a predicate to determine whether a character is a word character, an optional unary operator
* to be performed on words, a flag indicating whether to use parallel processing, and (optionally)
* a parallelism level.
* <p>
* To use this class, simply instantiate it with the appropriate lambdas and then call its
* {@code count} method:
* <p>
* <pre>
* // Count all words consisting of only alphabetic chars, ignoring case, using parallel processing
* new WordCounter(path, (c) -> Character.isAlphabetic(c), (s) -> s.toLowerCase(), true).count();
* </pre>
*
* @author Stoyan Rachev
*/
public class WordCounter {

private static final int MAX_ST_SIZE = 1024 * 1024;

private final Path path;
private final CharPredicate pred;
private final UnaryOperator<String> op;
Expand Down
10 changes: 10 additions & 0 deletions wordcounter/src/main/java/com/stoyanr/wordcounter/WordCounts.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@

import com.stoyanr.util.Logger;

/**
* A list of words mapped to their usage counts. Provides methods for adding word counts, checking
* for equality, printing, and internal iterations over its contents.
* Internally, this class encapsulates a {@code Map<String, AtomicInteger>} which is either a
* {@code HashMap} or a {@code ConcurrentHashMap} depending on the parallelism level specified
* upon construction. The word counting methods of {@link WordUtils} and {@link WordCounter} return
* instances of this class.
*
* @author Stoyan Rachev
*/
public class WordCounts {

private final Map<String, AtomicInteger> m;
Expand Down
12 changes: 12 additions & 0 deletions wordcounter/src/main/java/com/stoyanr/wordcounter/WordUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@

import com.stoyanr.util.CharPredicate;

/**
* A utility class that provides several overloaded static methods for counting words in strings.
* The central method {@code countWords} accepts a string, a predicate to determine whether a
* character is a word character, and an optional unary operator to be performed on words.
* <p>
* <pre>
* // Count all words consisting of only alphabetic chars, ignoring case
* WordCounts wc = WordUtils.countWords(text, (c) -> Character.isAlphabetic(c), (s) -> s.toLowerCase());
* </pre>
*
* @author Stoyan Rachev
*/
public class WordUtils {

public static WordCounts countWords(String text, CharPredicate pred) {
Expand Down
Loading

0 comments on commit 382f5d2

Please sign in to comment.