This repository has been archived by the owner on May 10, 2022. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
reimplement from scratch based on new knowledge/spec
- Loading branch information
Showing
13 changed files
with
290 additions
and
226 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,5 @@ | |
^appveyor\.yml$ | ||
^\.travis\.yml$ | ||
^data$ | ||
^tests/testsuite-py$ | ||
^datapackage.json$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,6 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method("$",jeroen) | ||
S3method("[",jeroen) | ||
S3method("[[",jeroen) | ||
S3method(print,jeroen) | ||
export(data_package) | ||
importFrom(readr,write_csv) | ||
importFrom(readr,write_tsv) | ||
importFrom(tools,md5sum) | ||
S3method(print,datapkg_data) | ||
S3method(print,datapkg_resources) | ||
export(datapkg_read) | ||
import(readr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
#' Open data-package | ||
#' | ||
#' Loads a data and meta-data from a 'data-package' directory or URL. | ||
#' | ||
#' @import readr | ||
#' @param path file path or URL to the data package directory | ||
#' @rdname datapackage | ||
#' @name datapackage | ||
#' @aliases datapkg | ||
#' @references \url{http://frictionlessdata.io/data-packages}, \url{https://github.com/datasets} | ||
#' @export | ||
#' @examples # Example data from https://github.com/datasets | ||
#' datapkg_read("https://raw.githubusercontent.com/datasets/ex-tabular-multiple-resources-fk/master") | ||
#' datapkg_read("https://raw.githubusercontent.com/datasets/gini-index/master") | ||
#' datapkg_read("https://raw.githubusercontent.com/datasets/euribor/master") | ||
datapkg_read <- function(path){ | ||
root <- sub("datapackage.json$", "", path) | ||
root <- sub("/$", "", root) | ||
json_path <- file.path(root, "/datapackage.json") | ||
json <- if(is_url(root)){ | ||
con <- curl::curl(json_path, "r") | ||
on.exit(close(con)) | ||
readLines(con, warn = FALSE) | ||
} else { | ||
readLines(normalizePath(json_path, mustWork = TRUE), warn = FALSE) | ||
} | ||
pkg_info <- jsonlite::fromJSON(json, simplifyVector = TRUE) | ||
if(is.data.frame(pkg_info$resources)) | ||
class(pkg_info$resources) <- c("datapkg_resources", class(pkg_info$resources)) | ||
if(is.data.frame(pkg_info$sources)) | ||
class(pkg_info$sources) <- c("datapkg_sources", class(pkg_info$sources)) | ||
pkg_info$data <- list(rep(NA, nrow(pkg_info$resources))) | ||
data_names <- pkg_info$resources$name | ||
for(i in seq_len(nrow(pkg_info$resources))){ | ||
target <- as.list(pkg_info$resources[i, ]) | ||
pkg_info$data[[i]] <- read_data_package(get_data_path(target, root), | ||
dialect = as.list(target$dialect), hash = target$hash, target$schema$fields[[1]]) | ||
} | ||
class(pkg_info$data) <- c("datapkg_data") | ||
if(length(data_names)) | ||
names(pkg_info$data) <- ifelse(is.na(data_names), "", data_names) | ||
pkg_info | ||
} | ||
|
||
get_data_path <- function(x, root){ | ||
if(length(x$path)){ | ||
data_path <- normalizePath(file.path(root, x$path), mustWork = FALSE) | ||
if(is_url(data_path) || file.exists(data_path)){ | ||
return(data_path) | ||
} else { | ||
if(length(x$url)){ | ||
message("File not found: ", data_path) | ||
return(x$url) | ||
} else { | ||
stop("File not found: ", data_path) | ||
} | ||
} | ||
} | ||
} | ||
|
||
is_url <- function(x){ | ||
grepl("^[a-zA-Z]+://", x) | ||
} | ||
|
||
read_data_package <- function(path, dialect = list(), hash = NULL, fields = NULL) { | ||
if(!length(fields)) | ||
return(data.frame()) | ||
col_types <- list() | ||
for(i in seq_len(nrow(fields))) | ||
col_types[[i]] <- do.call(make_field, as.list(fields[i,])) | ||
do.call(parse_data_file, c(list(file = path, col_types = col_types), dialect)) | ||
} | ||
|
||
make_field <- function(name = "", type = "string", description = "", format = "%Y-%m-%d", ...){ | ||
switch(type, | ||
string = col_character(), | ||
number = col_number(), | ||
integer = col_integer(), | ||
boolean = col_logical(), | ||
object = col_character(), | ||
array = col_character(), | ||
date = col_date(sub("^fmt:", "", format)), | ||
datetime = col_datetime(), | ||
time = col_time(), | ||
col_character() | ||
) | ||
} | ||
|
||
## Defaults from http://dataprotocols.org/csv-dialect/ | ||
parse_data_file <- function(file, col_types = NULL, delimiter = ",", doubleQuote = TRUE, | ||
lineTerminator = "\r\n", quoteChar = '"', escapeChar = "", skipInitialSpace = TRUE, | ||
header = TRUE, caseSensitiveHeader = FALSE){ | ||
|
||
# unused: lineTerminator, skipInitialSpace, caseSensitiveHeader | ||
message("Reading file ", file) | ||
readr::read_delim( | ||
col_types = col_types, | ||
file = file, | ||
delim = delimiter, | ||
escape_double = doubleQuote, | ||
quote = quoteChar, | ||
escape_backslash = identical(escapeChar, "\\"), | ||
col_names = header | ||
) | ||
} | ||
|
||
#' @export | ||
print.datapkg_resources <- function(x, ...){ | ||
print_names <- names(x) %in% c("name", "path", "format") | ||
print(as.data.frame(x)[print_names]) | ||
} | ||
|
||
#' @export | ||
print.datapkg_data <- function(x, ...){ | ||
for(i in seq_along(x)){ | ||
data_name <- names(x[i]) | ||
if(length(data_name) && !is.na(data_name)){ | ||
cat(" $", data_name, "\n", sep = "") | ||
} else { | ||
cat(" [[", i, "]]\n", sep = "") | ||
} | ||
mydata <- x[[i]] | ||
for(j in seq_along(mydata)){ | ||
cat(" [", j, "] ", names(mydata)[j], " (", methods::is(mydata[[j]])[1], ")\n", sep = "") | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
Oops, something went wrong.