In [8]:
# install needed package

# using Pkg
# Pkg.add("JSON3")
# Pkg.add("JSON")
# Pkg.add("Chain")
# Pkg.add("CSV")
# Pkg.add("JSONTables")

In [9]:
#Loading package
using HTTP
using JSON3
using Chain
using DataFrames
using JSONTables
using CSV


In [10]:
# We want to get the data for Canterbury Region and Otago Region, from year 2010 to 2020.
# ArcGIS services limit number of record to 32000, so we have to split into 3 REST API calls, one for records from 2010 to 2018, and another for records from 2019 to 2020 for Canterbury
# Otago data can be accquired in 1 call
# The URLs are collected from Crash Analysis System Data API website https://opendata-nzta.opendata.arcgis.com/datasets/NZTA::crash-analysis-system-cas-data-1/api

patch1_url = "https://services.arcgis.com/CXBb7LAjgIIdcsPt/arcgis/rest/services/CAS_Data_Public/FeatureServer/0/query?where=region%20=%20'CANTERBURY%20REGION'%20AND%20%20(crashYear%20%3E=%202010%20AND%20crashYear%20%3C=%202018)%20&resultType=standard&outFields=*&outSR=4326&f=json"
patch2_url = "https://services.arcgis.com/CXBb7LAjgIIdcsPt/arcgis/rest/services/CAS_Data_Public/FeatureServer/0/query?where=region%20=%20'CANTERBURY%20REGION'%20AND%20%20(crashYear%20%3E=%202019%20AND%20crashYear%20%3C=%202020)%20&resultType=standard&outFields=*&outSR=4326&f=json"
patch3_url = "https://services.arcgis.com/CXBb7LAjgIIdcsPt/arcgis/rest/services/CAS_Data_Public/FeatureServer/0/query?where=region%20=%20'OTAGO%20REGION'%20AND%20%20(crashYear%20%3E=%202010%20AND%20crashYear%20%3C=%202020)%20&resultType=standard&outFields=*&outSR=4326&f=json"
urls = [patch1_url, patch2_url, patch3_url]

3-element Vector{String}:
 "https://services.arcgis.com/CXB" ⋯ 205 bytes ⋯ "&outFields=*&outSR=4326&f=json"
 "https://services.arcgis.com/CXB" ⋯ 205 bytes ⋯ "&outFields=*&outSR=4326&f=json"
 "https://services.arcgis.com/CXB" ⋯ 200 bytes ⋯ "&outFields=*&outSR=4326&f=json"

In [11]:
# Create empty dataframe to hold GeoJson data from ArcGIS
colnames = ["X", "Y", "OBJECTID", "advisorySpeed", "areaUnitID", "bicycle", "bridge", "bus", "carStationWagon",	"cliffBank", "crashDirectionDescription", "crashFinancialYear",	
            "crashLocation1", "crashLocation2", "crashRoadSideRoad", "crashSeverity", "crashSHDescription", "crashYear", "debris", "directionRoleDescription", "ditch", "fatalCount",
            "fence", "flatHill", "guardRail", "holiday", "houseOrBuilding", "intersection", "kerb", "light", "meshblockId", "minorInjuryCount", "moped", "motorcycle","NumberOfLanes",
            "objectThrownOrDropped", "otherObject", "otherVehicleType", "overBank", "parkedVehicle", "pedestrian", "phoneBoxEtc", "postOrPole", "region", "roadCharacter", "roadLane",
            "roadSurface", "roadworks", "schoolBus", "seriousInjuryCount", "slipOrFlood", "speedLimit", "strayAnimal", "streetLight", "suv", "taxi", "temporarySpeedLimit",	"tlaId", 
            "tlaName", "trafficControl", "trafficIsland", "trafficSign", "train", "tree", "truck", "unknownVehicleType", "urban", "vanOrUtility", "vehicle", "waterRiver", "weatherA","weatherB"]

CASdf = DataFrame([[] for _ = colnames], colnames)

Unnamed: 0_level_0,X,Y,OBJECTID,advisorySpeed,areaUnitID,bicycle,bridge,bus,carStationWagon
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any


In [12]:
# API Calls and push data to dataframe
for url in urls
    data = JSON3.read((HTTP.get(url)).body)
    for id in 1:length(data["features"])
        atr = data["features"][id]["attributes"]
        geo = data["features"][id]["geometry"]
        push!(CASdf,[geo["x"] geo["y"] atr["OBJECTID"] atr["advisorySpeed"] atr["areaUnitID"] atr["bicycle"] atr["bridge"] atr["bus"] atr["carStationWagon"] atr["cliffBank"] atr["crashDirectionDescription"] atr["crashFinancialYear"] atr["crashLocation1"] atr["crashLocation2"] atr["crashRoadSideRoad"] atr["crashSeverity"] atr["crashSHDescription"] atr["crashYear"] atr["debris"] atr["directionRoleDescription"] atr["ditch"] atr["fatalCount"] atr["fence"] atr["flatHill"] atr["guardRail"] atr["holiday"] atr["houseOrBuilding"] atr["intersection"] atr["kerb"] atr["light"] atr["meshblockId"] atr["minorInjuryCount"] atr["moped"] atr["motorcycle"] atr["NumberOfLanes"] atr["objectThrownOrDropped"] atr["otherObject"] atr["otherVehicleType"] atr["overBank"] atr["parkedVehicle"] atr["pedestrian"] atr["phoneBoxEtc"] atr["postOrPole"] atr["region"] atr["roadCharacter"] atr["roadLane"] atr["roadSurface"] atr["roadworks"] atr["schoolBus"] atr["seriousInjuryCount"] atr["slipOrFlood"] atr["speedLimit"] atr["strayAnimal"] atr["streetLight"] atr["suv"] atr["taxi"] atr["temporarySpeedLimit"] atr["tlaId"] atr["tlaName"] atr["trafficControl"] atr["trafficIsland"] atr["trafficSign"] atr["train"] atr["tree"] atr["truck"] atr["unknownVehicleType"] atr["urban"] atr["vanOrUtility"] atr["vehicle"] atr["waterRiver"] atr["weatherA"] atr["weatherB"]]) 
    end
end

In [13]:
# wrangling the dataframe

# convert missing value, null, Null and Nil to NA
for col in names(CASdf)
    CASdf[!,col] = replace(CASdf[!,col], nothing => "NA")
    CASdf[!,col] = replace(CASdf[!,col], "null" => "NA")
    CASdf[!,col] = replace(CASdf[!,col], "Null" => "NA")
    CASdf[!,col] = replace(CASdf[!,col], "Nil" => "NA")
end

# remove the word "Region" from the region variable value
CASdf.region = replace(CASdf.region, "Canterbury Region" => "Canterbury")
CASdf.region = replace(CASdf.region, "Otago Region" => "Otago")


57459-element Vector{Any}:
 "Canterbury"
 "Canterbury"
 "Canterbury"
 "Canterbury"
 "Canterbury"
 "Canterbury"
 "Canterbury"
 "Canterbury"
 "Canterbury"
 "Canterbury"
 ⋮
 "Otago"
 "Otago"
 "Otago"
 "Otago"
 "Otago"
 "Otago"
 "Otago"
 "Otago"
 "Otago"

In [14]:
# save to csv
something.(CASdf, missing) |> 
  CSV.write("CAS.csv")

"CAS.csv"