In [104]:
# Install tidyverse, rvest and reshape
library(tidyverse)
library(rvest)
library(magrittr) # better handling of pipes
library(purrr) 
library(httr)
library(jsonlite)
# load ggplot2
library(ggplot2)

# COVID-19 Impacts Prototype

Set up of StatsNZ api to get COVID-19 data

Getting the catalogue of datasets:

In [17]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY

get_odata_catalogue <-  function(service, endpoint, service_api_key) {
  
  catalogue_url <- URLencode(paste0(service, "/", endpoint))
  
  # Add the proxy authentication
  config_proxy <- use_proxy(
    url = curl::ie_get_proxy_for_url(service), 
    auth = "any", 
    username = ""
  )
  
  # Look at the available tables
  opendata_catalogue <- 
    GET(
      url = catalogue_url,
      config_proxy,
      add_headers(.headers = c('Cache-Control' = 'no-cache',
                               'Ocp-Apim-Subscription-Key' = service_api_key)),
      timeout(60)
    ) %>%
    content(as = "text") %>%
    fromJSON()
  
  opendata_catalogue <- as.data.frame(opendata_catalogue$dataset) %>%
    unnest_longer(distribution)
  
  
  structure(opendata_catalogue,
            comment = "Odata Catalogue")
  
}

Calling catalogue function with specific arguments

In [18]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY
Catalogue <- get_odata_catalogue(
  service="https://api.stats.govt.nz/opendata/v1",
  endpoint="data.json",
  service_api_key = "e71d9d8bcc7a430b991e2e4be88d3c43" ### Angus STATSNZ API key
)
Catalogue %>% typeof()
Catalogue %>% glimpse()

Rows: 4
Columns: 16
$ title              <chr> "Employment Indicators", "Overseas Cargo", "Covid19…
$ description        <chr> "This employment indicator series is intended to pr…
$ identifier         <chr> "https://api.stats.govt.nz/odata/v1/EmploymentIndic…
$ license            <chr> "https://creativecommons.org/licenses/by/4.0/", "ht…
$ keyword            <list> <"Employment", "ResourceID MEI1.1: Filled jobs and…
$ issued             <chr> "2020-08-11T19:45:06Z", "2020-08-06T23:31:28Z", "20…
$ modified           <chr> "2020-12-10T22:39:15Z", "2020-12-10T22:51:17Z", "20…
$ publisher          <df[,2]> <data.frame[4 x 2]>
$ contactPoint       <df[,3]> <data.frame[4 x 3]>
$ landingPage        <chr> "https://www.stats.govt.nz", "https://www.stats.…
$ language           <list> "en", "en", "en", "en"
$ accrualPeriodicity <chr> "Monthly", "Monthly", "Weekly", "Monthly"
$ theme              <chr> "Employment", "Commerce, trade and industry", ""…
$ temporal           <chr> "1999-04-30/", "198

Getting resource (info about datasets) and observation data (actual data in datasets)

In [103]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY
get_odata <-  function(service, endpoint, entity, query_option, service_api_key) {
  
  config_proxy <- use_proxy(
    url = curl::ie_get_proxy_for_url(service),
    auth = "any",
    username = ""
  )
  
  odata_url <- URLencode(paste0(service, "/", endpoint, "/", entity, "?", query_option))
  top_query <- grepl("$top",query_option,fixed=TRUE)
  
  # continue getting results while there are additional pages
  
  while (!is.null(odata_url)) {
    
    result <- GET(odata_url,
                  config_proxy,
                  add_headers(.headers = c("Content-Type" = "application/json;charset=UTF-8",
                                           "Ocp-Apim-Subscription-Key" = service_api_key)),
                  timeout(60)
    )
    
    
    # catch errors
    
    if (http_type(result) != "application/json") {
      stop("API did not return json", call. = FALSE)
    }
    
    
    if (http_error(result)) {
      stop(
        sprintf(
          "The request failed - %s \n%s \n%s ",
          http_status(result)$message,
          fromJSON(content(result, "text"))$value,
          odata_url
        ),
        call. = FALSE
      )
    }
    
    
    # parse and concatenate result while retaining UTF-8 encoded characters
    
    parsed <- jsonlite::fromJSON(content(result, "text", encoding = "UTF-8"), flatten = TRUE)
    response  <- rbind(parsed$value, if(exists("response")) response)
    odata_url <- parsed$'@odata.nextLink'
    
    
    cat("\r", nrow(response), "obs retrieved")
    
    # break when top(n) obs are specified
    
    if (top_query) {
      break
    }
    
  }
  
  structure(response,
            comment = "Odata response")
  
}

Getting Covid 19 Case number dataset from API

In [102]:
# SOURCE DECLARATION: FOLLOWING CODE BLOCK IS WRITTEN FROM STATSNZ AND WE HAVE MADE SMALL CHANGES TO GET THE REQUIRED DATA
# ALSO USING PERSONAL API KEY 
Number_of_cases_Resources <-  Filter(function(x)!all(is.na(x)),
                      get_odata(
                        service = "https://api.stats.govt.nz/opendata/v1",
                        endpoint = "Covid-19Indicators",
                        entity = "Resources",
                        query_option = "$filter=(ResourceID eq 'CPCOV2')",
                        service_api_key = "e71d9d8bcc7a430b991e2e4be88d3c43")) ### Angus STATSNZ API key 


Number_of_cases__Observations <-  Filter(function(x)!all(is.na(x)),
                         get_odata(
                           service = "https://api.stats.govt.nz/opendata/v1",
                           endpoint = "Covid-19Indicators",
                           entity = "Observations",
                           query_option = "$filter=(ResourceID eq 'CPCOV2')",
                           service_api_key = "e71d9d8bcc7a430b991e2e4be88d3c43")) ### Angus STATSNZ API key

Number_of_cases_Resources %>% glimpse()
Number_of_cases__Observations %>% glimpse()


 1554 obs retrievedRows: 1
Columns: 9
$ ResourceID  <chr> "CPCOV2"
$ Subject     <chr> "COVID-19"
$ Title       <chr> "Number of Cases"
$ Description <chr> "COVID-19 cases in New Zealand (cumulative)"
$ Source      <chr> "Ministry of Health"
$ SourceURL   <chr> "https://www.health.govt.nz/our-work/diseases-and-conditio…
$ Modified    <chr> "2021-10-06T11:00:00Z"
$ Frequency   <chr> "Daily"
$ Var1        <chr> "Case status"
Rows: 1,554
Columns: 9
$ id         <chr> "3934f4da-e3ed-43c4-8656-441f49f9c7ca", "2a4fa96a-27a1-423b…
$ ResourceID <chr> "CPCOV2", "CPCOV2", "CPCOV2", "CPCOV2", "CPCOV2", "CPCOV2",…
$ Period     <chr> "2020-02-28", "2020-02-29", "2020-03-01", "2020-03-02", "20…
$ Duration   <chr> "P1D", "P1D", "P1D", "P1D", "P1D", "P1D", "P1D", "P1D", "P1…
$ Label1     <chr> "Deceased", "Deceased", "Deceased", "Deceased", "Deceased",…
$ Value      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ Unit       <chr> "Number", "Number", "Number", "Number", "Number", "

In [100]:
Number_of_cases_dataset = Number_of_cases_Resources %>% full_join(Number_of_cases__Observations, by = "ResourceID")




In [105]:
write.csv(Number_of_cases_dataset, "Number_of_cases.csv")