Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 7518b3c
Showing
10 changed files
with
153 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
Package: musemeta | ||
Type: Package | ||
Title: R Client for Scraping Museum Metadata | ||
Description: Scrape museum metadata. | ||
Version: 0.0.1.99 | ||
Date: 2014-11-11 | ||
Authors@R: as.person(c("Scott Chamberlain <myrmecocystus@gmail.com> [aut,cre]")) | ||
License: MIT + file LICENSE | ||
URL: https://github.com/ropensci/musemeta | ||
BugReports: http://www.github.com/ropensci/musemeta/issues | ||
Imports: | ||
httr, | ||
jsonlite, | ||
XML | ||
Suggests: | ||
roxygen2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
YEAR: 2014 | ||
COPYRIGHT HOLDER: Scott Chamberlain |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Generated by roxygen2 (4.0.2): do not edit by hand | ||
|
||
S3method(print,muse) | ||
export(muse_get) | ||
importFrom(XML,htmlParse) | ||
importFrom(XML,xmlValue) | ||
importFrom(XML,xpathApply) | ||
importFrom(XML,xpathSApply) | ||
importFrom(httr,GET) | ||
importFrom(httr,content) | ||
importFrom(httr,stop_for_status) | ||
importFrom(jsonlite,fromJSON) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#' Get metadata for an object id. | ||
#' | ||
#' @importFrom httr GET content stop_for_status | ||
#' @importFrom jsonlite fromJSON | ||
#' @importFrom XML xpathApply xpathSApply xmlValue htmlParse | ||
#' @export | ||
#' | ||
#' @param id An object id | ||
#' @param ... Curl args passed on to \code{\link[httr]{GET}} | ||
#' @examples \donttest{ | ||
#' muse_get(559490) | ||
#' muse_get(559490)$name | ||
#' muse_get(559490)$values | ||
#' muse_get(246562) | ||
#' } | ||
muse_get <- function(id, ...){ | ||
out <- musemeta_GET(paste0(mmbase(), id), ...) | ||
muse_parse(out) | ||
} | ||
|
||
#' @export | ||
print.muse <- function(x, ...){ | ||
cat(sprintf("<Museum metadata> %s", x$name), sep = "\n") | ||
for(i in seq_along(x$values)){ | ||
cat(sprintf(" %s: %s", x$values[[i]]$name, x$values[[i]]$value), sep = "\n") | ||
} | ||
} | ||
|
||
musemeta_GET <- function(url, args = list(), ...){ | ||
res <- GET(url, query = args, ...) | ||
stop_for_status(res) | ||
content(res, "text") | ||
} | ||
|
||
muse_parse <- function(x){ | ||
tmp <- htmlParse(x) | ||
tcon <- xpathApply(tmp, "//div[@class='tombstone-container']")[[1]] | ||
name <- xmlValue(xpathApply(tcon, "h2")[[1]]) | ||
tomb <- xpathApply(tcon, "//div[@class='tombstone']")[[1]] | ||
tags <- lapply(xpathApply(tomb, "div"), function(x){ | ||
list(name = gsub(":", "", xpathSApply(x, "strong", xmlValue)), | ||
value = gsub("^\\s+", "", gsub(".+\n|.+\r", "", xmlValue(x)))) | ||
}) | ||
structure(list(name=name, values=tags), class="muse") | ||
} | ||
|
||
mmbase <- function() "http://www.metmuseum.org/collection/the-collection-online/search/" | ||
|
||
# url <- 'http://www.metmuseum.org/collection/the-collection-online/search/559490?rpp=30&pg=1&rndkey=20140815&ft=*&what=Bronze&pos=1' | ||
# url <- 'http://www.metmuseum.org/collection/the-collection-online/search/559490' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#' R client for museum metadata | ||
#' | ||
#' @name musemeta-package | ||
#' @aliases musemeta | ||
#' @docType package | ||
#' @title R client for museum metadata | ||
#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com} | ||
#' @keywords package | ||
NULL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
% Generated by roxygen2 (4.0.2): do not edit by hand | ||
\name{muse_get} | ||
\alias{muse_get} | ||
\title{Get metadata for an object id.} | ||
\usage{ | ||
muse_get(id, ...) | ||
} | ||
\arguments{ | ||
\item{id}{An object id} | ||
|
||
\item{...}{Curl args passed on to \code{\link[httr]{GET}}} | ||
} | ||
\description{ | ||
Get metadata for an object id. | ||
} | ||
\examples{ | ||
\donttest{ | ||
muse_get(559490) | ||
muse_get(559490)$name | ||
muse_get(559490)$values | ||
muse_get(246562) | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
% Generated by roxygen2 (4.0.2): do not edit by hand | ||
\docType{package} | ||
\name{musemeta-package} | ||
\alias{musemeta} | ||
\alias{musemeta-package} | ||
\title{R client for museum metadata} | ||
\description{ | ||
R client for museum metadata | ||
} | ||
\author{ | ||
Scott Chamberlain \email{myrmecocystus@gmail.com} | ||
} | ||
\keyword{package} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: knitr | ||
LaTeX: pdfLaTeX | ||
|
||
AutoAppendNewline: Yes | ||
StripTrailingWhitespace: Yes | ||
|
||
BuildType: Package | ||
PackageUseDevtools: Yes | ||
PackageInstallArgs: --no-multiarch --with-keep.source |