/
example_ngram.R
57 lines (51 loc) · 1.34 KB
/
example_ngram.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
\dontrun{
# use sunburst to analyze ngram data from Peter Norvig
# http://norvig.com/mayzner.html
library(sunburstR)
library(pipeR)
# read the csv data downloaded from the Google Fusion Table linked in the article
ngrams2 <- read.csv(
system.file(
"examples/ngrams2.csv"
,package="sunburstR"
)
, stringsAsFactors = FALSE
)
ngrams2 %>>%
# let's look at ngrams at the start of a word, so columns 1 and 3
(.[,c(1,3)]) %>>%
# split the ngrams into a sequence by splitting each letter and adding -
(
data.frame(
sequence = strsplit(.[,1],"") %>>%
lapply( function(ng){ paste0(ng,collapse = "-") } ) %>>%
unlist
,freq = .[,2]
,stringsAsFactors = FALSE
)
) %>>%
sunburst
library(htmltools)
ngrams2 %>>%
(
lapply(
seq.int(3,ncol(.))
,function(letpos){
(.[,c(1,letpos)]) %>>%
# split the ngrams into a sequence by splitting each letter and adding -
(
data.frame(
sequence = strsplit(.[,1],"") %>>%
lapply( function(ng){ paste0(ng,collapse = "-") } ) %>>%
unlist
,freq = .[,2]
,stringsAsFactors = FALSE
)
) %>>%
( tags$div(style="float:left;",sunburst( ., height = 300, width = 300 )) )
}
)
) %>>%
tagList %>>%
browsable
}