Skip to content
This repository has been archived by the owner on May 10, 2022. It is now read-only.

Commit

Permalink
metadata fxn's ready, still working on fxn for searching the corpus
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed Jun 29, 2012
1 parent d4273ed commit 7b6dbde
Show file tree
Hide file tree
Showing 26 changed files with 497 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
.DS_Store
22 changes: 22 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,22 @@
Package: rhindwai
Type: Package
Title: Interface to Hindwai Publishing OAI-PMH methods
Version: 0.0.1
License: C00
Date: 2012-06-27
Authors@R: c(person("Scott", "Chamberlain", role = c("aut", "cre"),
email = "myrmecocystus@gmail.com"))
Description: A programmatic interface to the
Web Service methods provided by Hindwai.
URL: https://github.com/ropensci/rhindwai
Collate:
'getrecord.r'
'identify.r'
'listidentifiers.r'
'listmetadataformats.r'
'listrecords.r'
'listsets.r'
'getcorpus.r'
'journals.r'
'subjects.r'
'unzipcorpus.r'
12 changes: 12 additions & 0 deletions NAMESPACE
@@ -0,0 +1,12 @@
export(getcorpus)
export(getrecord)
export(identify)
export(journals)
export(listidentifiers)
export(listmetadataformats)
export(listrecords)
export(listsets)
export(subjects)
export(unzipcorpus)
import(OAIHarvester)
import(XML)
1 change: 1 addition & 0 deletions R/.gitignore
@@ -0,0 +1 @@
.DS_Store
13 changes: 13 additions & 0 deletions R/getcorpus.r
@@ -0,0 +1,13 @@
#' Download Hindawi corpus zip file to "~/".
#'
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' getcorpus()
#' }
#' @export
getcorpus <- function()
{
download.file(
url = "ftp://hindawi.corpus:download@ftp.hindawi.com/articles.zip",
destfile = "~/hindwai_corpus.zip", quiet = TRUE)
}
22 changes: 22 additions & 0 deletions R/getrecord.r
@@ -0,0 +1,22 @@
#' Retrieve an individual metadata record from a Hindawi Publishing Corporation repository.
#'
#' @import OAIHarvester
#' @inheritParams listmetadataformats
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' out <- getrecord("10.1155/2011/391971", T)
#' out$datestamp # get $identifier, $datestamp, $setSpec, or $metadata
#' oaih_transform(out$metadata) # transform only metadata to a list
#' }
#' @export
getrecord <- function(id = NULL, transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
{
if(!is.null(id) == T) {oaiid <- paste("oai:hindawi.com:", id, sep="")} else
{oaiid <- NULL}
oaih_get_record(
url,
oaiid,
prefix = "oai_dc",
transform = transform)
}
20 changes: 20 additions & 0 deletions R/hindawi_search.r
@@ -0,0 +1,20 @@
# #' Search the Hindawi corpus locally.
# #'
# #' The Hindawi corpus zip file (http://www.hindawi.com/corpus/) is updated
# #' every day.
# #'
# #' @import tm
# #' @param terms Search terms.
# #' @param fuzzy Fuzzy seearch? Defaults to FALSE.
# #' @param journal_title Journal title to search.
# #' @param year Year to search.
# #' @param directory Defaults to where they were written with getcorpus().
# #' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
# #' @examples \dontrun{
# #' hindawi_search()
# #' }
# #' @export
# hindawi_search <- function(terms = NA, , directory)
# {
# # xmlParse("~/hindwai/2011/ISRN.SP/101582-2011-06-08.xml")
# }
15 changes: 15 additions & 0 deletions R/identify.r
@@ -0,0 +1,15 @@
#' Retrieve information about the Hindwai repository.
#'
#' Learn about the Hindwai OAI-PMH service
#' @import OAIHarvester
#' @inheritParams listmetadataformats
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' identify()
#' }
#' @export
identify <- function(transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
{
oaih_identify(url, transform = transform)
}
19 changes: 19 additions & 0 deletions R/journals.r
@@ -0,0 +1,19 @@
#' Get information on each journal.
#'
#' @import XML
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' journals()
#' }
#' @export
journals <- function()
{
temp <- xmlParse("~/hindwai/Subjects.xml")
jtitle <- xpathSApply(temp, "//full.title", xmlValue)
issn <- xpathSApply(temp, "//e-issn", xmlValue)
doi <- xpathSApply(temp, "//doi", xmlValue)
startyear <- xpathSApply(temp, "//start.year", xmlValue)
pubyears <- xpathSApply(temp, "//publication.years",
function(x) c(xmlValue(x)))
data.frame(jtitle, issn, doi, startyear, pubyears)
}
33 changes: 33 additions & 0 deletions R/listidentifiers.r
@@ -0,0 +1,33 @@
#' Retrieve an abbreviated form of List Records, retrieving only headers
#' rather than records for specific volume in journal
#'
#' @import OAIHarvester
#' @param from specifies that records returned must have been created/update/deleted
#' on or after this date.
#' @param until specifies that records returned must have been created/update/deleted
#' on or before this date.
#' @param set specifies the set that returned records must belong to.
#' @param prefix specifies the metadata format that the records will be
#' returned in.
#' @param token a token previously provided by the server to resume a request
#' where it last left off.
#' @inheritParams listmetadataformats
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' temp <- listidentifiers()
#' temp[[1]]
#' listidentifiers(set = 'aaa:2007')
#' }
#' @export
listidentifiers <- function(from = NULL, until = NULL, set = NULL,
prefix = 'oai_dc', token = NULL, transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
{
oaih_list_identifiers(
url,
prefix = prefix,
from = from,
until = until,
set = set,
transform = transform)
}
15 changes: 15 additions & 0 deletions R/listmetadataformats.r
@@ -0,0 +1,15 @@
#' Retrieve the metadata formats available from Hindawi Publishing Corporation repository.
#'
#' @import OAIHarvester
#' @param transform transform metadata to list (TRUE/FALSE)
#' @param url the base url for Hindwai (leave to default)
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' listmetadataformats()
#' }
#' @export
listmetadataformats <- function(transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
{
oaih_list_metadata_formats(url, transform = transform)
}
21 changes: 21 additions & 0 deletions R/listrecords.r
@@ -0,0 +1,21 @@
#' Harvest records from a Hindawi Publishing Corporation repository.
#'
#' @import OAIHarvester
#' @inheritParams listidentifiers
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' listrecords(set = 'AMET:2012', transform=F)
#' }
#' @export
listrecords <- function(from = NULL, until = NULL, set = NULL, prefix = 'oai_dc',
token = NULL, transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
{
oaih_list_records(
url,
prefix = prefix,
from = from,
until = until,
set = set,
transform = transform)
}
15 changes: 15 additions & 0 deletions R/listsets.r
@@ -0,0 +1,15 @@
#' Retrieve the set structure of Hindawi Publishing Corporation repository.
#'
#' @import OAIHarvester
#' @inheritParams listmetadataformats
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' sets <- listsets()
#' head(sets)
#' }
#' @export
listsets <- function(transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
{
oaih_list_sets(url, transform = transform)
}
14 changes: 14 additions & 0 deletions R/subjects.r
@@ -0,0 +1,14 @@
#' Get the subjects areas for Hindawi journals, and the journals that
#' are in each subject.
#'
#' @import XML
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' subjects()
#' }
#' @export
subjects <- function()
{
temp <- xmlParse("~/hindwai/Subjects.xml")
xpathSApply(temp, "//subject.title", xmlValue)
}
11 changes: 11 additions & 0 deletions R/unzipcorpus.r
@@ -0,0 +1,11 @@
#' Unzip downloaded corpus after using getcorpus().
#'
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}
#' @examples \dontrun{
#' unzipcorpus()
#' }
#' @export
unzipcorpus <- function()
{
unzip("~/hindwai_corpus.zip", exdir="~/hindwai")
}
20 changes: 19 additions & 1 deletion README.md
@@ -1,4 +1,22 @@
rhindawi
========

R interface to the Hindawi Journals metadata and corpus
Install using install_github within [Hadley's](https://github.com/hadley/devtools) devtools package.

```R
install.packages("devtools")
require(devtools)
install_github("rhindawi", "ropensci")
require(rhindawi)
```

This package will access 1) Hindawi Journals metadata, and 2) the full corpus. The full corpus is apparently updated daily. There is no API for the full text, but functions in this package (`getcorpus()`, `unzipcorpus()`, `hindawi_search()`) allow easy download and searching of the full corpus.


+ [Hindawi Journals OAI-PMH metadata documentation](http://www.hindawi.com/oai-pmh/)
+ [Hindawi Journals corpus download](http://www.hindawi.com/corpus/)
+ [Documentation for OAI-PMH in general](http://www.openarchives.org/OAI/openarchivesprotocol.html)

You do not need an API key, or other authentication.

`rhindwai` is part of the rOpenSci project, visit http://ropensci.org to learn more.
18 changes: 18 additions & 0 deletions man/getcorpus.Rd
@@ -0,0 +1,18 @@
\name{getcorpus}
\alias{getcorpus}
\title{Download Hindawi corpus zip file to "~/".}
\usage{
getcorpus()
}
\description{
Download Hindawi corpus zip file to "~/".
}
\examples{
\dontrun{
getcorpus()
}
}
\author{
Scott Chamberlain \email{myrmecocystus@gmail.com}
}

24 changes: 24 additions & 0 deletions man/getrecord.Rd
@@ -0,0 +1,24 @@
\name{getrecord}
\alias{getrecord}
\title{Retrieve an individual metadata record from a Hindawi Publishing Corporation repository.}
\usage{
getrecord(id = NULL, transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
}
\arguments{
\item{transform}{transform metadata to list (TRUE/FALSE)}

\item{url}{the base url for Hindwai (leave to default)}
}
\description{
Retrieve an individual metadata record from a Hindawi
Publishing Corporation repository.
}
\examples{
\dontrun{
out <- getrecord("10.1155/2011/391971", T)
out$datestamp # get $identifier, $datestamp, $setSpec, or $metadata
oaih_transform(out$metadata) # transform only metadata to a list
}
}

21 changes: 21 additions & 0 deletions man/identify.Rd
@@ -0,0 +1,21 @@
\name{identify}
\alias{identify}
\title{Retrieve information about the Hindwai repository.}
\usage{
identify(transform = TRUE,
url = "http://www.hindawi.com/oai-pmh/oai.aspx")
}
\arguments{
\item{transform}{transform metadata to list (TRUE/FALSE)}

\item{url}{the base url for Hindwai (leave to default)}
}
\description{
Learn about the Hindwai OAI-PMH service
}
\examples{
\dontrun{
identify()
}
}

18 changes: 18 additions & 0 deletions man/journals.Rd
@@ -0,0 +1,18 @@
\name{journals}
\alias{journals}
\title{Get information on each journal.}
\usage{
journals()
}
\description{
Get information on each journal.
}
\examples{
\dontrun{
journals()
}
}
\author{
Scott Chamberlain \email{myrmecocystus@gmail.com}
}

0 comments on commit 7b6dbde

Please sign in to comment.