diff --git a/.Rbuildignore b/.Rbuildignore
index 91114bf..ced8b8a 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,2 +1,4 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
+^README.Rmd$
+
diff --git a/DESCRIPTION b/DESCRIPTION
index 90cd704..7ca3531 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,16 +1,23 @@
 Package: LIWCalike
 Type: Package
 Title: Text analysis similar to the Linguistic Inquiry and Word Count (LIWC)
-Version: 0.1.0
+Version: 0.1.1
 Date: 2016-04-22
 Author: Kenneth Benoit
 Maintainer: Kenneth Benoit <kbenoit@lse.ac.uk>
-Description: Built on the quanteda package for text analysis, LIWCalikes provides a simple interface to the analysis of text by counting words and other textual features, including the application of a dictionary to produce a tabular report of percentages.  This provides similar functionality to the LIWC stand-alone software.  The user must a dictionary, which can include one of the custom LIWC dictionaries if these have been purchased from http://liwc.wpengine.com.
+Description: Built on the quanteda package for text analysis, LIWCalikes
+    provides a simple interface to the analysis of text by counting words and other
+    textual features, including the application of a dictionary to produce a tabular
+    report of percentages. This provides similar functionality to the LIWC stand-
+    alone software. The user must a dictionary, which can include one of the custom
+    LIWC dictionaries if these have been purchased from http://liwc.wpengine.com.
 License: GPL-3
 LazyData: TRUE
-Depends: quanteda (>= 0.9.5.20)
-Imports: stringi
+Depends:
+    quanteda (>= 0.9.5-20)
+Imports:
+    stringi
 URL: http://github.com/kbenoit/LIWCalike
 Encoding: UTF-8
 BugReports: https://github.com/kbenoit/LIWCalike/issues
-VignetteBuilder: knitr
+RoxygenNote: 5.0.1
diff --git a/NAMESPACE b/NAMESPACE
index d75f824..88e32cf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1 +1,6 @@
-exportPattern("^[[:alpha:]]+")
+# Generated by roxygen2: do not edit by hand
+
+S3method(liwcalike,character)
+S3method(liwcalike,corpus)
+export(liwcalike)
+import(quanteda)
diff --git a/R/data.R b/R/data.R
new file mode 100644
index 0000000..e184d34
--- /dev/null
+++ b/R/data.R
@@ -0,0 +1,13 @@
+
+#' @name testphrases
+#' @docType data
+#' @title sample short documents for testing
+#' @description Some sample short documents in plain text format for testing
+#'   with \code{\link{liwcalike}}.
+#' @examples
+#' liwcalike(testphrases)
+#'
+NULL
+
+# save(testphrases, file = "data/testphrases.RData")
+# writeLines(testphrases, "inst/extdata/testphrases.txt")
diff --git a/R/liwc.R b/R/liwcalike.R
similarity index 67%
rename from R/liwc.R
rename to R/liwcalike.R
index 0bd0094..f3f894d 100644
--- a/R/liwc.R
+++ b/R/liwcalike.R
@@ -8,6 +8,8 @@
 #'   vector for analysis
 #' @param dictionary a \pkg{quanteda} \link[quanteda]{dictionary} object
 #'   supplied for analysis
+#' @param toLower convert to common (lower) case before tokenizing
+#' @param verbose if \code{TRUE} print status messages during processing
 #' @param ... options passed to \code{\link[quanteda]{tokenize}} offering
 #'   finer-grained control over how "words" are defined
 #' @return a data.frame object containing the analytic results, one row per
@@ -20,25 +22,41 @@
 #'   texts into smaller units based on user-supplied tags, sentence, or
 #'   paragraph boundaries.
 #' @examples
+#' liwcalike(testphrases)
+#'
+#' # examples for comparison
+#' txt <- c("The red-shirted lawyer gave her ex-boyfriend $300 out of pity :(.")
+#' myDict <- dictionary(list(people = c("lawyer", "boyfriend"),
+#'                           colorFixed = "red",
+#'                           colorGlob = "red*",
+#'                           mwe = "out of"))
+#' liwcalike(txt, myDict, what = "word")
+#' liwcalike(txt, myDict, what = "fasterword")
+#' (toks <- tokenize(txt, what = "fasterword", removeHyphens = TRUE))
+#' length(toks[[1]])
+#' # LIWC says 12 words
+#'
+#' \dontrun{# works with LIWC 2015 dictionary too
 #' liwcDict <- dictionary(file = "~/Dropbox/QUANTESS/dictionaries/LIWC/LIWC2015_English_Flat.dic",
 #'                        format = "LIWC")
-#' inaugLIWCanalysis <- liwc(inaugTexts, liwcDict)
-#'
+#' inaugLIWCanalysis <- liwcalike(inaugTexts, liwcDict)
+#' }
 #' @export
-liwc <- function(x, ...) {
-    UseMethod("liwc")
+#' @import quanteda
+liwcalike <- function(x, ...) {
+    UseMethod("liwcalike")
 }
 
 
-#' @rdname liwc
+#' @rdname liwcalike
 #' @export
-liwc.corpus <- function(x, ...) {
-    liwc(texts(x), ...)
+liwcalike.corpus <- function(x, ...) {
+    liwcalike(texts(x), ...)
 }
 
-#' @rdname liwc
+#' @rdname liwcalike
 #' @export
-liwc.character <- function(x, dictionary = NULL, toLower = TRUE, verbose = TRUE, ...) {
+liwcalike.character <- function(x, dictionary = NULL, toLower = TRUE, verbose = TRUE, ...) {
 
     ## initialize results data.frame
     ## similar to "Filename" and Segment
@@ -48,7 +66,7 @@ liwc.character <- function(x, dictionary = NULL, toLower = TRUE, verbose = TRUE,
                    stringsAsFactors = FALSE)
 
     ## get readability before lowercasing
-    WPS <- readability(x, "meanSentenceLength", ...)
+    WPS <- readability(x, "meanSentenceLength") #, ...)
 
     ## lower case the texts if required
     if (toLower) x <- toLower(x)
@@ -62,7 +80,7 @@ liwc.character <- function(x, dictionary = NULL, toLower = TRUE, verbose = TRUE,
     }
 
     ## tokenize and form the dfm
-    toks <- tokenize(x, ...)
+    toks <- tokenize(x, removePunct = TRUE, removeHyphens = TRUE, ...)
     dfmAll <- dfm(toks, verbose = FALSE)
     if (!is.null(dictionary))
         dfmDict <- dfm(toks, verbose = FALSE, dictionary = dictionary)
@@ -86,7 +104,8 @@ liwc.character <- function(x, dictionary = NULL, toLower = TRUE, verbose = TRUE,
     ## add the dictionary counts, transformed to percentages of total words
     if (!is.null(dictionary))
         result <- cbind(result,
-                        as.data.frame(dfmDict / rep(result[["WC"]], each = nfeature(dfmDict)) * 100))
+                        quanteda::as.data.frame(dfmDict / rep(result[["WC"]],
+                                                              each = nfeature(dfmDict))) * 100)
 
     ## add punctuation counts
     # AllPunc
@@ -102,9 +121,12 @@ liwc.character <- function(x, dictionary = NULL, toLower = TRUE, verbose = TRUE,
     # Parenth -- note this is specified as "pairs of parentheses"
     # OtherP
 
+    # format the result
+    result[, which(names(result)=="Sixltr") : ncol(result)] <-
+        format(result[, which(names(result)=="Sixltr") : ncol(result)],
+               digits = 4, trim = TRUE)
+
     result
 }
 
 
-# the word counts
-
diff --git a/README.Rmd b/README.Rmd
new file mode 100644
index 0000000..566ff84
--- /dev/null
+++ b/README.Rmd
@@ -0,0 +1,83 @@
+---
+output:
+  md_document:
+    variant: markdown_github
+---
+
+```{r, echo = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>",
+  fig.path = "README-"
+)
+```
+
+**Master branch** [![Build Status](https://travis-ci.org/kbenoit/LIWCalike.svg?branch=master)]
+[![codecov.io](https://codecov.io/github/kbenoit/LIWCalike/coverage.svg?branch=master)](https://codecov.io/github/kbenoit/LIWCalike/coverage.svg?branch=master)
+
+
+## LIWCalike: an R implementation of the Linguistic Inquiry and Word Count
+
+Built on the quanteda package for text analysis, LIWCalikes provides a simple interface to the analysis of text by counting words and other textual features, including the application of a dictionary to produce a tabular report of percentages.  This provides similar functionality to the LIWC stand-alone software.  The user must a dictionary, which can include one of the custom LIWC dictionaries if these have been purchased from http://liwc.wpengine.com, or any other dictionary supplied by the user.
+
+### Differences from the LIWC standalone software
+
+This package is designed for R users and those wishing to build functionality by extending the [**quanteda**](https://github.com/kbenoit/quanteda) package for text analysis.  If you prefer to have a complete, stand-alone user interface, then you should purchase and use the [LIWC standalone software](http://liwc.wpengine.com).  This has several advantages:
+
+*  LIWC allows direct importing of files, including binary (Word, pdf, etc) formats.  To use
+   **LIWCalike**, you will need to import these into the **quanteda** package first.  
+   **LIWCalike** also works fine with simple character vectors, if you prefer to use 
+   standard R methods to create your input object (e.g. `readLines()`, `read.csv()`, etc.)
+
+*  LIWC provides direct outputs in the form of csv, Excel files, etc.  By contrast, **LIWCalike** returns a `data.frame`, which you have to export yourself (e.g. using `write.csv()`.)
+
+*  LIWC provides easy segmentation, through a GUI.  By contrast, with **LIWCalike** you will
+   have to segment the texts yourself.  (**quanteda** provides easy ways to do this using 
+   `segment()` and `changeunits()`.)
+   
+*  LIWC color codes the dictionary value matches in your texts and displays these in a nice graphical window.  
+
+
+## Using dictionaries with LIWCalike
+
+No dictionaries are supplied with **LIWCalike**, it is up to you to supply these.  With the **quanteda** functions for creating or importing dictionaries, however, this is quite easy.
+
+With the LIWC 2007, external dictionaries were distributed with the software that could be used in the format read by Provalis Research's [*Wordstat*](http://provalisresearch.com/products/content-analysis-software/).  Because I purchases a license for this product, I have that file and can use it with **LIWCalike**.
+
+Using it is quite straightforward:
+
+```{r}
+require(LIWCalike)
+
+# read in the dictionary
+liwc2007dict <- dictionary(file = "~/Dropbox/QUANTESS/dictionaries/LIWC/LIWC2007.cat", 
+                           format = "wordstat")
+tail(liwc2007dict, 1)
+
+# our test data
+testphrases
+
+# call LIWCalike
+output <- liwcalike(testphrases, liwc2007dict)
+
+# view some results
+output[, c(1:7, ncol(output)-2)]
+```
+
+
+## How to Install
+
+```
+devtools::install_github("kbenoit/quanteda")
+devtools::install_github("kbenoit/LIWCalike")
+```
+
+You need to have installed the **quanteda** package of at least version 0.9.5-20 for this 
+to work, since that update implemented multi-word dictionary values.
+
+
+## Comments and feedback
+
+I welcome your comments and feedback.  Please file issues on the issues page, and/or send me comments at kbenoit@lse.ac.uk.
+
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0ae9b5a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,104 @@
+**Master branch** \[![Build Status](https://travis-ci.org/kbenoit/LIWCalike.svg?branch=master)\]\[![codecov.io](https://codecov.io/github/kbenoit/LIWCalike/coverage.svg?branch=master)\](<https://codecov.io/github/kbenoit/LIWCalike/coverage.svg?branch=master>)
+
+LIWCalike: an R implementation of the Linguistic Inquiry and Word Count
+-----------------------------------------------------------------------
+
+Built on the quanteda package for text analysis, LIWCalikes provides a simple interface to the analysis of text by counting words and other textual features, including the application of a dictionary to produce a tabular report of percentages. This provides similar functionality to the LIWC stand-alone software. The user must a dictionary, which can include one of the custom LIWC dictionaries if these have been purchased from <http://liwc.wpengine.com>, or any other dictionary supplied by the user.
+
+### Differences from the LIWC standalone software
+
+This package is designed for R users and those wishing to build functionality by extending the [**quanteda**](https://github.com/kbenoit/quanteda) package for text analysis. If you prefer to have a complete, stand-alone user interface, then you should purchase and use the [LIWC standalone software](http://liwc.wpengine.com). This has several advantages:
+
+-   LIWC allows direct importing of files, including binary (Word, pdf, etc) formats. To use **LIWCalike**, you will need to import these into the **quanteda** package first.
+    **LIWCalike** also works fine with simple character vectors, if you prefer to use standard R methods to create your input object (e.g. `readLines()`, `read.csv()`, etc.)
+
+-   LIWC provides direct outputs in the form of csv, Excel files, etc. By contrast, **LIWCalike** returns a `data.frame`, which you have to export yourself (e.g. using `write.csv()`.)
+
+-   LIWC provides easy segmentation, through a GUI. By contrast, with **LIWCalike** you will have to segment the texts yourself. (**quanteda** provides easy ways to do this using `segment()` and `changeunits()`.)
+
+-   LIWC color codes the dictionary value matches in your texts and displays these in a nice graphical window.
+
+Using dictionaries with LIWCalike
+---------------------------------
+
+No dictionaries are supplied with **LIWCalike**, it is up to you to supply these. With the **quanteda** functions for creating or importing dictionaries, however, this is quite easy.
+
+With the LIWC 2007, external dictionaries were distributed with the software that could be used in the format read by Provalis Research's [*Wordstat*](http://provalisresearch.com/products/content-analysis-software/). Because I purchases a license for this product, I have that file and can use it with **LIWCalike**.
+
+Using it is quite straightforward:
+
+``` r
+require(LIWCalike)
+#> Loading required package: LIWCalike
+#> Loading required package: quanteda
+#> quanteda version 0.9.5.20
+#> 
+#> Attaching package: 'quanteda'
+#> The following object is masked from 'package:base':
+#> 
+#>     sample
+
+# read in the dictionary
+liwc2007dict <- dictionary(file = "~/Dropbox/QUANTESS/dictionaries/LIWC/LIWC2007.cat", 
+                           format = "wordstat")
+#> Warning in strsplit(w, "\\("): input string 1 is invalid in this locale
+tail(liwc2007dict, 1)
+#> $`SPOKEN CATEGORIES.FILLERS`
+#>  [1] "blah"         NA             "idontknow"    "imean"       
+#>  [5] "ohwell"       "oranything*"  "orsomething*" "orwhatever*" 
+#>  [9] "rr*"          "yakn*"        "ykn*"         "youknow*"
+
+# our test data
+testphrases
+#>  [1] "Test sentence for LIWCalike.  Second sentence."                   
+#>  [2] "Each row is a document."                                          
+#>  [3] "Comma, period."                                                   
+#>  [4] "The red-shirted lawyer gave her ex-boyfriend $300 out of pity :(."
+#>  [5] "LOL :-)."                                                         
+#>  [6] "(Parentheses) for $100."                                          
+#>  [7] "Say \"what\" again!!"                                             
+#>  [8] "Why are we here?"                                                 
+#>  [9] "Other punctation: §; ±."                                          
+#> [10] "Sentence one.  Sentence two! :-)"
+
+# call LIWCalike
+output <- liwcalike(testphrases, liwc2007dict)
+
+# view some results
+output[, c(1:7, ncol(output)-2)]
+#>        docname Segment WC WPS Sixltr    Dic
+#> text1    text1       1  6   3  50.00 120.00
+#> text2    text2       2  5   5  20.00  50.00
+#> text3    text3       3  2   2   0.00 100.00
+#> text4    text4       4 12  12  16.67  40.00
+#> text5    text5       5  1   1   0.00  33.33
+#> text6    text6       6  3   3  33.33  75.00
+#> text7    text7       7  3   3   0.00  30.00
+#> text8    text8       8  4   4   0.00  26.67
+#> text9    text9       9  2   2  50.00  66.67
+#> text10  text10      10  4   2  50.00 100.00
+#>        LINGUISTIC PROCESSES.FUNCTION WORDS SPOKEN CATEGORIES.ASSENT
+#> text1                                33.33                        0
+#> text2                                50.00                        0
+#> text3                                 0.00                        0
+#> text4                                66.67                        0
+#> text5                                 0.00                       25
+#> text6                                16.67                        0
+#> text7                                33.33                        0
+#> text8                                50.00                        0
+#> text9                                16.67                        0
+#> text10                               33.33                        0
+```
+
+How to Install
+--------------
+
+    devtools::install_github("kbenoit/quanteda")
+    devtools::install_github("kbenoit/LIWCalike")
+
+You need to have installed the **quanteda** package of at least version 0.9.5-20 for this to work, since that update implemented multi-word dictionary values.
+
+Comments and feedback
+---------------------
+
+I welcome your comments and feedback. Please file issues on the issues page, and/or send me comments at <kbenoit@lse.ac.uk>.
diff --git a/data/testphrases.RData b/data/testphrases.RData
new file mode 100644
index 0000000..94bf6de
Binary files /dev/null and b/data/testphrases.RData differ
diff --git a/inst/extdata/testphrases.txt b/inst/extdata/testphrases.txt
new file mode 100644
index 0000000..0b653af
--- /dev/null
+++ b/inst/extdata/testphrases.txt
@@ -0,0 +1,10 @@
+Test sentence for LIWCalike.  Second sentence.
+Each row is a document.
+Comma, period.
+The red-shirted lawyer gave her ex-boyfriend $300 out of pity :(.
+LOL :-).
+(Parentheses) for $100.
+Say "what" again!!
+Why are we here?
+Other punctation: ^; %, &.
+Sentence one.  Sentence two! :-)
diff --git a/man/hello.Rd b/man/hello.Rd
deleted file mode 100644
index 0fa7c4b..0000000
--- a/man/hello.Rd
+++ /dev/null
@@ -1,12 +0,0 @@
-\name{hello}
-\alias{hello}
-\title{Hello, World!}
-\usage{
-hello()
-}
-\description{
-Prints 'Hello, world!'.
-}
-\examples{
-hello()
-}
diff --git a/man/liwcalike.Rd b/man/liwcalike.Rd
new file mode 100644
index 0000000..1d34253
--- /dev/null
+++ b/man/liwcalike.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/liwcalike.R
+\name{liwcalike}
+\alias{liwcalike}
+\alias{liwcalike.character}
+\alias{liwcalike.corpus}
+\title{analyze text in a LIWC-alike fashion}
+\usage{
+liwcalike(x, ...)
+
+\method{liwcalike}{corpus}(x, ...)
+
+\method{liwcalike}{character}(x, dictionary = NULL, toLower = TRUE,
+  verbose = TRUE, ...)
+}
+\arguments{
+\item{x}{input object, a \pkg{quanteda} \link[quanteda]{corpus} or character
+vector for analysis}
+
+\item{...}{options passed to \code{\link[quanteda]{tokenize}} offering
+finer-grained control over how "words" are defined}
+
+\item{dictionary}{a \pkg{quanteda} \link[quanteda]{dictionary} object
+supplied for analysis}
+
+\item{toLower}{convert to common (lower) case before tokenizing}
+
+\item{verbose}{if \code{TRUE} print status messages during processing}
+}
+\value{
+a data.frame object containing the analytic results, one row per
+  document supplied
+}
+\description{
+Analyze a set of texts to produce a dataset of percentages and other
+quantities describing the text, similar to the functionality supplied by the
+Linguistic Inquiry and Word Count standalone software distributed at
+\url{http://liwc.wpengine.com}.
+}
+\section{Segmentation}{
+ The LIWC standalone software has many options for
+  segmenting the text.  While this function does not supply segmentation
+  options, you can easily achieve the same effect by converting the input
+  object into a corpus (if it is not already a corpus) and using
+  \link[quanteda]{changeunits} or \link[quanteda]{segment} to split the input
+  texts into smaller units based on user-supplied tags, sentence, or
+  paragraph boundaries.
+}
+\examples{
+liwcalike(testphrases)
+
+# examples for comparison
+txt <- c("The red-shirted lawyer gave her ex-boyfriend $300 out of pity :(.")
+myDict <- dictionary(list(people = c("lawyer", "boyfriend"),
+                          colorFixed = "red",
+                          colorGlob = "red*",
+                          mwe = "out of"))
+liwcalike(txt, myDict, what = "word")
+liwcalike(txt, myDict, what = "fasterword")
+(toks <- tokenize(txt, what = "fasterword", removeHyphens = TRUE))
+length(toks[[1]])
+# LIWC says 12 words
+
+\dontrun{# works with LIWC 2015 dictionary too
+liwcDict <- dictionary(file = "~/Dropbox/QUANTESS/dictionaries/LIWC/LIWC2015_English_Flat.dic",
+                       format = "LIWC")
+inaugLIWCanalysis <- liwcalike(inaugTexts, liwcDict)
+}
+}
+
diff --git a/man/testphrases.Rd b/man/testphrases.Rd
new file mode 100644
index 0000000..25a4dc6
--- /dev/null
+++ b/man/testphrases.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data.R
+\docType{data}
+\name{testphrases}
+\alias{testphrases}
+\title{sample short documents for testing}
+\description{
+Some sample short documents in plain text format for testing
+  with \code{\link{liwcalike}}.
+}
+\examples{
+liwcalike(testphrases)
+
+}
+