C R C++ Other
Latest commit b0997a5 Feb 4, 2017 @jeroenooms jeroenooms Fix Solaris

README.md

jqr

Build Status Build status Coverage Status rstudio mirror downloads cran version

R interface to jq, a JSON processor http://stedolan.github.io/jq/

jqr is currenlty using jq v1.5rc2 - v1.5 manual

jqr makes it easy to process large amounts of json without having to convert from json to R, or without using regular expressions. This means that the eventual loading into R can be quicker.

Introduction

Install

Stable version:

install.packages("jqr")

Development version:

devtools::install_github("ropensci/jqr")
library("jqr")

Interfaces

low level

There's a low level interface in which you can execute jq code just as you would on the command line:

str <- '[{
    "foo": 1,
    "bar": 2
  },
  {
    "foo": 3,
    "bar": 4
  },
  {
    "foo": 5,
    "bar": 6
}]'
jq(str, ".[]")
#> [
#>     {
#>         "foo": 1,
#>         "bar": 2
#>     },
#>     {
#>         "foo": 3,
#>         "bar": 4
#>     },
#>     {
#>         "foo": 5,
#>         "bar": 6
#>     }
#> ]
jq(str, "[.[] | {name: .foo} | keys]")
#> [
#>     [
#>         "name"
#>     ],
#>     [
#>         "name"
#>     ],
#>     [
#>         "name"
#>     ]
#> ]

high level

The other is higher level, and uses a suite of functions to construct queries. Queries are constucted, then excuted internally with jq() after the last piped command.

You don't have to use pipes though. See examples below.

Examples:

Index

x <- '[{"message": "hello", "name": "jenn"}, {"message": "world", "name": "beth"}]'
x %>% index()
#> [
#>     {
#>         "message": "hello",
#>         "name": "jenn"
#>     },
#>     {
#>         "message": "world",
#>         "name": "beth"
#>     }
#> ]

Sort

'[8,3,null,6]' %>% sortj
#> [
#>     null,
#>     3,
#>     6,
#>     8
#> ]

reverse order

'[1,2,3,4]' %>% reverse
#> [
#>     4,
#>     3,
#>     2,
#>     1
#> ]

Show the query to be used using peek()

FIXME - broken right now

'[1,2,3,4]' %>% reverse %>% peek

get multiple outputs for array w/ > 1 element

x <- '{"user":"stedolan","titles":["JQ Primer", "More JQ"]}'
jq(x, '{user, title: .titles[]}')
x %>% index()
x %>% select(user, title = `.titles[]`)
jq(x, '{user, title: .titles[]}') %>% jsonlite::toJSON() %>% jsonlite::validate()

string operations

join

'["a","b,c,d","e"]' %>% join
#> "a, b,c,d, e"
'["a","b,c,d","e"]' %>% join(`;`)
#> "a; b,c,d; e"

ltrimstr

'["fo", "foo", "barfoo", "foobar", "afoo"]' %>% index() %>% ltrimstr(foo)
#> [
#>     "fo",
#>     "",
#>     "barfoo",
#>     "bar",
#>     "afoo"
#> ]

rtrimstr

'["fo", "foo", "barfoo", "foobar", "foob"]' %>% index() %>% rtrimstr(foo)
#> [
#>     "fo",
#>     "",
#>     "bar",
#>     "foobar",
#>     "foob"
#> ]

startswith

'["fo", "foo", "barfoo", "foobar", "barfoob"]' %>% index %>% startswith(foo)
#> [
#>     false,
#>     true,
#>     false,
#>     true,
#>     false
#> ]

endswith

'["fo", "foo", "barfoo", "foobar", "barfoob"]' %>% index %>% endswith(foo)
#> [
#>     false,
#>     true,
#>     true,
#>     false,
#>     false
#> ]

tojson, fromjson, tostring

'[1, "foo", ["foo"]]' %>% index
#> [
#>     1,
#>     "foo",
#>     [
#>         "foo"
#>     ]
#> ]
'[1, "foo", ["foo"]]' %>% index %>% tostring
#> [
#>     "1",
#>     "foo",
#>     "[\"foo\"]"
#> ]
'[1, "foo", ["foo"]]' %>% index %>% tojson
#> [
#>     "1",
#>     "\"foo\"",
#>     "[\"foo\"]"
#> ]
'[1, "foo", ["foo"]]' %>% index %>% tojson %>% fromjson
#> [
#>     1,
#>     "foo",
#>     [
#>         "foo"
#>     ]
#> ]

contains

'"foobar"' %>% contains("bar")
#> true

unique

'[1,2,5,3,5,3,1,3]' %>% uniquej
#> [
#>     1,
#>     2,
#>     3,
#>     5
#> ]

types

get type information for each element

'[0, false, [], {}, null, "hello"]' %>% types
#> [
#>     "number",
#>     "boolean",
#>     "array",
#>     "object",
#>     "null",
#>     "string"
#> ]
'[0, false, [], {}, null, "hello", true, [1,2,3]]' %>% types
#> [
#>     "number",
#>     "boolean",
#>     "array",
#>     "object",
#>     "null",
#>     "string",
#>     "boolean",
#>     "array"
#> ]

select elements by type

'[0, false, [], {}, null, "hello"]' %>% index() %>% type(booleans)
#> false

key operations

get keys

str <- '{"foo": 5, "bar": 7}'
str %>% keys()
#> [
#>     "bar",
#>     "foo"
#> ]

delete by key name

str %>% del(bar)
#> {
#>     "foo": 5
#> }

check for key existence

str3 <- '[[0,1], ["a","b","c"]]'
str3 %>% haskey(2)
#> [
#>     false,
#>     true
#> ]
str3 %>% haskey(1,2)
#> [
#>     true,
#>     false,
#>     true,
#>     true
#> ]

Select variables by name, and rename

'{"foo": 5, "bar": 7}' %>% select(a = .foo)
#> {
#>     "a": 5
#> }

More complicated select(), using the included dataset githubcommits

githubcommits %>%
  index() %>%
  select(sha = .sha, name = .commit.committer.name)
#> [
#>     {
#>         "sha": [
#>             "110e009996e1359d25b8e99e71f83b96e5870790"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "a50e548cc5313c187483bc8fb1b95e1798e8ef65"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "4b258f7d31b34ff5d45fba431169e7fd4c995283"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     }
#> ]

Maths

'{"a": 7}' %>%  do(.a + 1)
#> 8
'{"a": [1,2], "b": [3,4]}' %>%  do(.a + .b)
#> [
#>     1,
#>     2,
#>     3,
#>     4
#> ]
'{"a": [1,2], "b": [3,4]}' %>%  do(.a - .b)
#> [
#>     1,
#>     2
#> ]
'{"a": 3}' %>%  do(4 - .a)
#> 1
'["xml", "yaml", "json"]' %>%  do('. - ["xml", "yaml"]')
#> ". - [\"xml\", \"yaml\"]"
'5' %>%  do(10 / . * 3)
#> 6

comparisons

'[5,4,2,7]' %>% index() %>% do(. < 4)
#> [
#>     false,
#>     false,
#>     true,
#>     false
#> ]
'[5,4,2,7]' %>% index() %>% do(. > 4)
#> [
#>     true,
#>     false,
#>     false,
#>     true
#> ]
'[5,4,2,7]' %>% index() %>% do(. <= 4)
#> [
#>     false,
#>     true,
#>     true,
#>     false
#> ]
'[5,4,2,7]' %>% index() %>% do(. >= 4)
#> [
#>     true,
#>     true,
#>     false,
#>     true
#> ]
'[5,4,2,7]' %>% index() %>% do(. == 4)
#> [
#>     false,
#>     true,
#>     false,
#>     false
#> ]
'[5,4,2,7]' %>% index() %>% do(. != 4)
#> [
#>     true,
#>     false,
#>     true,
#>     true
#> ]

length

'[[1,2], "string", {"a":2}, null]' %>% index %>% lengthj
#> [
#>     2,
#>     6,
#>     1,
#>     0
#> ]

sqrt

'9' %>% sqrtj
#> 3

floor

'3.14159' %>% floorj
#> 3

find minimum

'[5,4,2,7]' %>% minj
#> 2
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% minj
#> {
#>     "foo": 2,
#>     "bar": 3
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% minj(foo)
#> {
#>     "foo": 1,
#>     "bar": 14
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% minj(bar)
#> {
#>     "foo": 2,
#>     "bar": 3
#> }

find maximum

'[5,4,2,7]' %>% maxj
#> 7
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% maxj
#> {
#>     "foo": 1,
#>     "bar": 14
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% maxj(foo)
#> {
#>     "foo": 2,
#>     "bar": 3
#> }
'[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' %>% maxj(bar)
#> {
#>     "foo": 1,
#>     "bar": 14
#> }

Combine into valid JSON

jq sometimes creates pieces of JSON that are valid in themselves, but together are not. combine() is a way to make valid JSON.

This outputs a few pieces of JSON

(x <- githubcommits %>%
  index() %>%
  select(sha = .sha, name = .commit.committer.name))
#> [
#>     {
#>         "sha": [
#>             "110e009996e1359d25b8e99e71f83b96e5870790"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "a50e548cc5313c187483bc8fb1b95e1798e8ef65"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "4b258f7d31b34ff5d45fba431169e7fd4c995283"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     }
#> ]

Use combine() to put them together.

combine(x)
#> [
#>     {
#>         "sha": [
#>             "110e009996e1359d25b8e99e71f83b96e5870790"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "7b6a018dff623a4f13f6bcd52c7c56d9b4a4165f"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "a50e548cc5313c187483bc8fb1b95e1798e8ef65"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "4b258f7d31b34ff5d45fba431169e7fd4c995283"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     },
#>     {
#>         "sha": [
#>             "d1cb8ee0ad3ddf03a37394bfa899cfd3ddd007c5"
#>         ],
#>         "name": [
#>             "Nicolas Williams"
#>         ]
#>     }
#> ]

Meta

  • Please report any issues or bugs.
  • License: MIT
  • Get citation information for jqr in R doing citation(package = 'jqr')
  • Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.

rofooter