Permalink
Browse files

finalised association table functions

in rpubmed_mesh: mesh_assoc_table and keyword_assoc_table. Reduced
mesh_table to helper function.
  • Loading branch information...
1 parent 1b996e0 commit 53cba15c07057659ede329bdbf8344f71e8ec70e @DASpringate DASpringate committed Feb 11, 2013
Showing with 128 additions and 10 deletions.
  1. +3 −0 NAMESPACE
  2. +51 −8 R/rpubmed_mesh.R
  3. +1 −1 man/get_mesh_headings.Rd
  4. +31 −0 man/keyword_assoc_table.Rd
  5. +13 −1 man/mesh_assoc_table.Rd
  6. +19 −0 man/mesh_heading_frequency.Rd
  7. +10 −0 man/mesh_table.Rd
View
@@ -6,6 +6,9 @@ export(geocode_addresses)
export(get_article_location_data)
export(get_articles_by_terms)
export(get_mesh_headings)
+export(keyword_assoc_table)
+export(mesh_assoc_table)
+export(mesh_heading_frequency)
export(pubmed_fetch)
export(record_counts_by_year)
export(write_JSON_file)
View
@@ -1,14 +1,14 @@
-
-#'
+#' Builds an association matrix for all MeSH terms in an article corpus
+#' @export
+#' @param corpus a list of Pubmed records. e.g. as returned by fetch_in_chunks()
+#' @return matrix
#' @examples \dontrun{
#'
#' articles <- fromJSON("Test/PCD_articles.json")
-#' a = mesh_table(articles)
#' diab = get_articles_by_terms(articles, list("diabetes", c("case-control")), where= in_mesh_abstract_p)
-#' diab_tab <- mesh_table(diab)
-#' diab_assoc <- mesh_assoc_table(diab_tab)
+#' diab_assoc <- mesh_assoc_table(diab)
#' # list of MeSH headings with frequencies:
-#' d <- diag(ccsa)
+#' d <- diag(diab_assoc)
#'
#' Pick only those appearing in > 5 articles:
#' d_5 <- d[d > 5]
@@ -26,7 +26,8 @@
#' plot(g, layout=layout1)
#' }
#'
-mesh_assoc_table <- function(m_table){
+mesh_assoc_table <- function(corpus){
+ m_table <- mesh_table(corpus)
assoc_table <- matrix(0, nrow = length(levels(m_table$X1)), ncol = length(levels(m_table$X1)),
dimnames = list(levels(m_table$X1)[ordered = TRUE], levels(m_table$X1)[ordered = TRUE]))
m_table$X1 <- as.character(m_table$X1)
@@ -37,6 +38,7 @@ mesh_assoc_table <- function(m_table){
assoc_table
}
+#' helper function for mesh_assoc_table
mesh_table <- function(articles){
m_table <- data.frame(do.call(rbind,
lapply(1:length(articles),
@@ -54,11 +56,46 @@ mesh_table <- function(articles){
m_table
}
+#' Builds an association table for a character vector of search terms in a corpus. This can then e.g. be fed into igraph to generate an adjacency graph of terms
+#' @export
+#' @param corpus a list of Pubmed records. e.g. as returned by fetch_in_chunks()
+#' @param keyword_list Character vector of keywords
+#' @param ... arguments to be passed to get_articles_by_terms. e.g. where = in_mesh_abstract_p
+#' @return matrix
+#' @examples \dontrun{
+#' articles <- fromJSON("Test/PCD_articles.json")
+#' keywords <- c("effectiveness", "treatment outcome", "comorbidity", "risk factor", "incidence")
+#' kat <- keyword_assoc_table(corpus = articles, keyword_list = keywords, where = in_mesh_abstract_p)
+#' }
+keyword_assoc_table <- function(corpus, keyword_list, ...){
+ kt <- data.frame(rbind(t(combn(keywords, 2)),
+ matrix(rep(keywords, each = 2), ncol= 2, byrow = TRUE)), stringsAsFactors = FALSE)
+ kt$count <- sapply(1:nrow(kt),
+ function(i){
+ if(kt[i,1] == kt[i,2]){
+ length(get_articles_by_terms(corpus = corpus,
+ term_list= list(kt[i,1]),
+ ...))
+ } else{
+ length(get_articles_by_terms(corpus = corpus,
+ term_list= lapply(kt[i,], function(x) x),
+ ...))
+ }
+ })
+ kt$Xfact <- factor(kt$X1)
+ assoc_table <- matrix(0, nrow = length(levels(kt$Xfact)), ncol = length(levels(kt$Xfact)),
+ dimnames = list(levels(kt$Xfact)[ordered = TRUE], levels(kt$Xfact)[ordered = TRUE]))
+ for(i in 1:nrow(kt)){
+ assoc_table[[kt[i, "X1"], kt[i, "X2"]]] <- kt$count[i]
+ }
+ assoc_table
+}
+
#' Returns a list of MeSH headings for an article
#' @export
#' @param article List representing a single Pubmed Record e.g. an element from a list returned from fetch_in_chunks()
-#' @return list of MeSHHeadings
+#' @return list of MeSH Headings
#'
#'
get_mesh_headings <- function(article){
@@ -69,6 +106,12 @@ get_mesh_headings <- function(article){
})
}
+#' Returns a data frame of all MeSH headings in a corpus, with frequencies for each
+#' @export
+#' @param article List representing a single Pubmed Record e.g. an element from a list returned from fetch_in_chunks()
+#' @return list of MeSHHeadings
+#'
+#'
mesh_heading_frequency <- function(corpus){
ft <- data.frame(table(as.character(unlist(lapply(corpus,
function(record) as.character(unlist(get_mesh_headings(record))))))))
View
@@ -10,7 +10,7 @@
fetch_in_chunks()}
}
\value{
- list of MeSHHeadings
+ list of MeSH Headings
}
\description{
Returns a list of MeSH headings for an article
View
@@ -0,0 +1,31 @@
+\name{keyword_assoc_table}
+\alias{keyword_assoc_table}
+\title{Builds an association table for a character vector of search terms in a corpus. This can then e.g. be fed into igraph to generate an adjacency graph of terms}
+\usage{
+ keyword_assoc_table(corpus, keyword_list, ...)
+}
+\arguments{
+ \item{corpus}{a list of Pubmed records. e.g. as returned
+ by fetch_in_chunks()}
+
+ \item{keyword_list}{Character vector of keywords}
+
+ \item{...}{arguments to be passed to
+ get_articles_by_terms. e.g. where = in_mesh_abstract_p}
+}
+\value{
+ matrix
+}
+\description{
+ Builds an association table for a character vector of
+ search terms in a corpus. This can then e.g. be fed into
+ igraph to generate an adjacency graph of terms
+}
+\examples{
+\dontrun{
+ articles <- fromJSON("Test/PCD_articles.json")
+ keywords <- c("effectiveness", "treatment outcome", "comorbidity", "risk factor", "incidence")
+ kat <- keyword_assoc_table(corpus = articles, eyword_list = keywords, where = in_meah_abstract_p)
+ }
+}
+
View
@@ -1,7 +1,19 @@
\name{mesh_assoc_table}
\alias{mesh_assoc_table}
+\title{Builds an association matrix for all MeSH terms in an article corpus}
\usage{
- mesh_assoc_table(m_table)
+ mesh_assoc_table(corpus)
+}
+\arguments{
+ \item{corpus}{a list of Pubmed records. e.g. as returned
+ by fetch_in_chunks()}
+}
+\value{
+ matrix
+}
+\description{
+ Builds an association matrix for all MeSH terms in an
+ article corpus
}
\examples{
\dontrun{
@@ -0,0 +1,19 @@
+\name{mesh_heading_frequency}
+\alias{mesh_heading_frequency}
+\title{Returns a data frame of all MeSH headings in a corpus, with frequencies for each}
+\usage{
+ mesh_heading_frequency(corpus)
+}
+\arguments{
+ \item{article}{List representing a single Pubmed Record
+ e.g. an element from a list returned from
+ fetch_in_chunks()}
+}
+\value{
+ list of MeSHHeadings
+}
+\description{
+ Returns a data frame of all MeSH headings in a corpus,
+ with frequencies for each
+}
+
View
@@ -0,0 +1,10 @@
+\name{mesh_table}
+\alias{mesh_table}
+\title{helper function for mesh_assoc_table}
+\usage{
+ mesh_table(articles)
+}
+\description{
+ helper function for mesh_assoc_table
+}
+

0 comments on commit 53cba15

Please sign in to comment.