In [3]:
# Install tidyverse, rvest and reshape
library(tidyverse)
library(rvest)
library(magrittr) # better handling of pipes
library(purrr) 
library(httr)
library(jsonlite)
# load ggplot2
library(ggplot2)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.5     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘rvest’


The following object is masked from ‘package:readr’:

    guess_encoding



Attaching package: ‘magrittr’


The following object is masked from ‘package:purrr’:

    set_names


The following object is masked from ‘package:tidyr’:

    extract



Attaching package: ‘jsonlite’


The following object is masked from ‘package:pu

# Shipping Prototype

Set up of StatsNZ api to get Shipping data

Getting the catalogue of datasets:

In [4]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY

get_odata_catalogue <-  function(service, endpoint, service_api_key) {
  
  catalogue_url <- URLencode(paste0(service, "/", endpoint))
  
  # Add the proxy authentication
  config_proxy <- use_proxy(
    url = curl::ie_get_proxy_for_url(service), 
    auth = "any", 
    username = ""
  )
  
  # Look at the available tables
  opendata_catalogue <- 
    GET(
      url = catalogue_url,
      config_proxy,
      add_headers(.headers = c('Cache-Control' = 'no-cache',
                               'Ocp-Apim-Subscription-Key' = service_api_key)),
      timeout(60)
    ) %>%
    content(as = "text") %>%
    fromJSON()
  
  opendata_catalogue <- as.data.frame(opendata_catalogue$dataset) %>%
    unnest_longer(distribution)
  
  
  structure(opendata_catalogue,
            comment = "Odata Catalogue")
  
}

Calling catalogue function with specific arguments

In [5]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY
Catalogue <- get_odata_catalogue(
  service="https://api.stats.govt.nz/opendata/v1",
  endpoint="data.json",
  service_api_key = "e71d9d8bcc7a430b991e2e4be88d3c43" ### Angus STATSNZ API key
)
Catalogue %>% typeof()
Catalogue %>% glimpse()

Rows: 4
Columns: 16
$ title              [3m[90m<chr>[39m[23m "Employment Indicators", "Overseas Cargo", "Covid19…
$ description        [3m[90m<chr>[39m[23m "This employment indicator series is intended to pr…
$ identifier         [3m[90m<chr>[39m[23m "https://api.stats.govt.nz/odata/v1/EmploymentIndic…
$ license            [3m[90m<chr>[39m[23m "https://creativecommons.org/licenses/by/4.0/", "ht…
$ keyword            [3m[90m<list>[39m[23m <"Employment", "ResourceID MEI1.1: Filled jobs and…
$ issued             [3m[90m<chr>[39m[23m "2020-08-11T19:45:06Z", "2020-08-06T23:31:28Z", "20…
$ modified           [3m[90m<chr>[39m[23m "2020-12-10T22:39:15Z", "2020-12-10T22:51:17Z", "20…
$ publisher          [3m[90m<df[,2]>[39m[23m <data.frame[4 x 2]>
$ contactPoint       [3m[90m<df[,3]>[39m[23m <data.frame[4 x 3]>
$ landingPage        [3m[90m<chr>[39m[23m "https://www.stats.govt.nz", "https://www.stats.…
$ language           [3m[90m<list>[39m[23m "en", 

Getting resource (info about datasets) and observation data (actual data in datasets)

In [6]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY
get_odata <-  function(service, endpoint, entity, query_option, service_api_key) {
  
  config_proxy <- use_proxy(
    url = curl::ie_get_proxy_for_url(service),
    auth = "any",
    username = ""
  )
  
  odata_url <- URLencode(paste0(service, "/", endpoint, "/", entity, "?", query_option))
  top_query <- grepl("$top",query_option,fixed=TRUE)
  
  # continue getting results while there are additional pages
  
  while (!is.null(odata_url)) {
    
    result <- GET(odata_url,
                  config_proxy,
                  add_headers(.headers = c("Content-Type" = "application/json;charset=UTF-8",
                                           "Ocp-Apim-Subscription-Key" = service_api_key)),
                  timeout(60)
    )
    
    
    # catch errors
    
    if (http_type(result) != "application/json") {
      stop("API did not return json", call. = FALSE)
    }
    
    
    if (http_error(result)) {
      stop(
        sprintf(
          "The request failed - %s \n%s \n%s ",
          http_status(result)$message,
          fromJSON(content(result, "text"))$value,
          odata_url
        ),
        call. = FALSE
      )
    }
    
    
    # parse and concatenate result while retaining UTF-8 encoded characters
    
    parsed <- jsonlite::fromJSON(content(result, "text", encoding = "UTF-8"), flatten = TRUE)
    response  <- rbind(parsed$value, if(exists("response")) response)
    odata_url <- parsed$'@odata.nextLink'
    
    
    cat("\r", nrow(response), "obs retrieved")
    
    # break when top(n) obs are specified
    
    if (top_query) {
      break
    }
    
  }
  
  structure(response,
            comment = "Odata response")
  
}

Getting Covid 19 Case number dataset from API

In [17]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY 
Overseas_Cargo_Resources <-  Filter(function(x)!all(is.na(x)),
                      get_odata(
                        service = "https://api.stats.govt.nz/opendata/v1",
                        endpoint = "OverseasCargo",
                        entity = "Resources",
                        query_option = "$filter=(ResourceID eq 'OSC1.1')",
                        service_api_key = "e71d9d8bcc7a430b991e2e4be88d3c43")) ### Angus STATSNZ API key 


Overseas_Cargo_Observations <-  Filter(function(x)!all(is.na(x)),
                         get_odata(
                           service = "https://api.stats.govt.nz/opendata/v1",
                           endpoint = "OverseasCargo",
                           entity = "Observations",
                           query_option = "$filter=(ResourceID eq 'OSC1.1')",
                           service_api_key = "e71d9d8bcc7a430b991e2e4be88d3c43")) ### Angus STATSNZ API key




 39592 obs retrieved

In [18]:
Overseas_Cargo_dataset = Overseas_Cargo_Resources %>% full_join(Overseas_Cargo_Observations, by = "ResourceID")

In [20]:
write.csv(Overseas_Cargo_dataset,"Overseas_Cargo_dataset.csv") 