-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
365 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,4 +18,4 @@ inst/syllable_logo | |
inst/staticdocs | ||
inst/extra_statdoc | ||
inst/maintenance.R | ||
|
||
inst/syllable_dictionary_scraping |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
# Generated by roxygen2 (4.1.1): do not edit by hand | ||
|
||
export(compute_syllable_counts) | ||
export(count_string) | ||
export(lookup_syllable_counts) | ||
export(sum_string) | ||
importFrom(data.table,setDT) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#' String Syllables Counts | ||
#' | ||
#' Syllable counts for the words in a single string. | ||
#' | ||
#' @param x A character string. | ||
#' @param \ldots ignored | ||
#' @return Returns a vector of integer counts. | ||
#' @export | ||
#' @examples | ||
#' count_string("I like chicken and eggs for breakfast") | ||
#' count_string(hamlets_soliloquy) | ||
#' library(stringi) | ||
#' | ||
#' data.frame( | ||
#' word = stri_extract_all_words(stri_trans_tolower(hamlets_soliloquy))[[1]], | ||
#' syllables = count_string(hamlets_soliloquy) | ||
#' ) | ||
count_string <- function(x, ...){ | ||
|
||
if (length(x) > 1) { | ||
stop("`count_string` operates on a string.\n", | ||
"Consider using `count_vector` instead") | ||
} | ||
|
||
syllable_count_long_vector(x) | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#' String Syllables Sum | ||
#' | ||
#' Syllable sum for the words in a single string. | ||
#' | ||
#' @param x A character string. | ||
#' @param \ldots ignored | ||
#' @return Returns a single integer of the total number of syllables in the string. | ||
#' @export | ||
#' @examples | ||
#' sum_string("I like chicken and eggs for breakfast") | ||
#' sum_string(hamlets_soliloquy) | ||
sum_string <- function(x, ...){ | ||
|
||
if (length(x) > 1) { | ||
stop("`count_string` operates on a string.\n", | ||
"Consider using `count_vector` instead") | ||
} | ||
|
||
sum(syllable_count_long_vector(x), na.rm = TRUE) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
p_load(dplyr, XML, qdapDictionaries, parallel) | ||
|
||
#syllable_df <- list2df(as.list.environment(qdap::env.syl), "syllables", "word")[, 2:1] | ||
#no_syllable <- GradyAugmented[!GradyAugmented %in% syllable_df$word] | ||
|
||
syllable_scrape <- function(x){ | ||
URL <- sprintf('http://www.poetrysoup.com/syllables/%s', x) | ||
Table <- XML::readHTMLTable(URL, FALSE, which=2, stringsAsFactors = FALSE)[1:2, 2] | ||
NAdet <- function(x) grepl("^N/A$", x) | ||
data.frame(word = x, syllables = as.numeric(Table[1]), | ||
in_dict = NAdet(Table[2]), stringsAsFactors = FALSE) | ||
} | ||
|
||
|
||
syllable_scrape('jo-ann') | ||
|
||
|
||
words <- sort(unique(tolower(c(GradyAugmented, tolower(NAMES[[1]]))))) | ||
iter_list <- split((iter <- seq_along(words)), cut(iter, 20)) | ||
n <- 1 | ||
|
||
n <- 1 + n; n | ||
#===================== | ||
tic() | ||
########### | ||
|
||
|
||
cl <- makeCluster(mc <- getOption("cl.cores", 6)) | ||
clusterExport(cl=cl, varlist=c("words", "syllable_scrape"), envir=environment()) | ||
|
||
sylls <- parallel::parLapply(cl, iter_list[[n]], function(i) { | ||
|
||
Sys.sleep(.1) | ||
try(syllable_scrape(words[i])) | ||
|
||
}) | ||
|
||
saveRDS(sylls, file = sprintf("sylls/sylls_%s.rds", n)) | ||
stopCluster(cl) | ||
|
||
|
||
redos <- sapply(sylls, inherits, "try-error") | ||
|
||
grab_again <- words[iter_list[[n]]][redos] | ||
if (!identical(character(0), grab_again)){ | ||
|
||
sylls[redos] <- lapply(grab_again, function(x){ | ||
Sys.sleep(.1) | ||
try(syllable_scrape(x)) | ||
}) | ||
|
||
(wrds <- words[iter_list[[n]]][sapply(sylls, inherits, "try-error")]) | ||
cat(sprintf("%s errors\n", length(wrds))) | ||
if (length(wrds) > 0) cat(paste(wrds, collapse =", "), "\n") | ||
|
||
sylls[sapply(sylls, inherits, "try-error")] <- lapply(wrds, function(x){ | ||
out <- syllable_count(x) | ||
data_frame(word = out[1,1], syllables = out[1, 2], in_dict = ifelse(out[1,3] == "NF", FALSE, TRUE)) | ||
}) | ||
|
||
saveRDS(sylls, file = sprintf("sylls/sylls_%s.rds", n)) | ||
} else { | ||
cat("no errors\n") | ||
} | ||
############# | ||
toc() | ||
#===================== | ||
|
||
|
||
syll_df <- dplyr::rbind_all(sylls[!sapply(sylls, inherits, "try-error")]) | ||
saveRDS(syll_df, file = sprintf("syll_df/syll_df_%s.rds", n)) | ||
|
||
|
||
syllable_counts_data <- dplyr::rbind_all(lapply(1:20, function(i){ | ||
|
||
readRDS(sprintf("syll_df/syll_df_%s.rds", i)) | ||
})) | ||
|
||
|
||
|
||
|
||
sylls_original <- syllable_counts_data %>% | ||
filter(!in_dict) %>% | ||
select(word) %>% | ||
inner_join(syllable_df) %>% | ||
`[`(1:47,) | ||
|
||
|
||
syllable_counts_data <- syllable_counts_data[!syllable_counts_data[["word"]] %in% sylls_original[["word"]], ] | ||
|
||
sylls_original[["in_dict"]] <- TRUE | ||
|
||
syllable_counts_data <- dplyr::rbind_all(list(syllable_counts_data, sylls_original)) %>% | ||
arrange(word) %>% | ||
select(-in_dict) %>% | ||
as.data.frame(, stringsAsFactors = FALSE) | ||
|
||
|
||
data.table::setDT(syllable_counts_data) | ||
data.table::setkey(syllable_counts_data, word) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/count_string.R | ||
\name{count_string} | ||
\alias{count_string} | ||
\title{String Syllables Counts} | ||
\usage{ | ||
count_string(x, ...) | ||
} | ||
\arguments{ | ||
\item{x}{A character string.} | ||
|
||
\item{\ldots}{ignored} | ||
} | ||
\value{ | ||
Returns a vector of integer counts. | ||
} | ||
\description{ | ||
Syllable counts for the words in a single string. | ||
} | ||
\examples{ | ||
count_string("I like chicken and eggs for breakfast") | ||
count_string(hamlets_soliloquy) | ||
library(stringi) | ||
|
||
data.frame( | ||
word = stri_extract_all_words(stri_trans_tolower(hamlets_soliloquy))[[1]], | ||
syllables = count_string(hamlets_soliloquy) | ||
) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/syllable-package.R | ||
\docType{data} | ||
\name{hamlets_soliloquy} | ||
\alias{hamlets_soliloquy} | ||
\title{Hamlet's Soliloquy} | ||
\format{A character vector with 1 element} | ||
\usage{ | ||
data(hamlets_soliloquy) | ||
} | ||
\description{ | ||
A dataset containing a character string of Hamlet's soliloquy. | ||
} | ||
\keyword{datasets} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/syllable-package.R | ||
\docType{data} | ||
\name{presidential_debates_2012} | ||
\alias{presidential_debates_2012} | ||
\title{2012 U.S. Presidential Debates} | ||
\format{A data frame with 2912 rows and 4 variables} | ||
\usage{ | ||
data(presidential_debates_2012) | ||
} | ||
\description{ | ||
A dataset containing a cleaned version of all three presidential debates for | ||
the 2012 election. | ||
} | ||
\details{ | ||
\itemize{ | ||
\item person. The speaker | ||
\item tot. Turn of talk | ||
\item dialogue. The words spoken | ||
\item time. Variable indicating which of the three debates the dialogue is from | ||
} | ||
} | ||
\keyword{datasets} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
% Generated by roxygen2 (4.1.1): do not edit by hand | ||
% Please edit documentation in R/sum_string.R | ||
\name{sum_string} | ||
\alias{sum_string} | ||
\title{String Syllables Sum} | ||
\usage{ | ||
sum_string(x, ...) | ||
} | ||
\arguments{ | ||
\item{x}{A character string.} | ||
|
||
\item{\ldots}{ignored} | ||
} | ||
\value{ | ||
Returns a single integer of the total number of syllables in the string. | ||
} | ||
\description{ | ||
Syllable sum for the words in a single string. | ||
} | ||
\examples{ | ||
sum_string("I like chicken and eggs for breakfast") | ||
sum_string(hamlets_soliloquy) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
context("Checking count_string") | ||
|
||
test_that("count_string ...",{ | ||
|
||
|
||
}) | ||
|
Oops, something went wrong.