# Load Western Australia Shipwreck Data
Loads WAM-002 Shipwrecks from https://catalogue.data.wa.gov.au/dataset/shipwrecks
[Direct Link](https://data-downloads.slip.wa.gov.au/WAM-002/GeoJSON) and enriches with BOM Marine Zones IDM000003 http://reg.bom.gov.au/catalogue/spatialdata.pdf

WAM-002 requires a free SLIP account.
CC BY 4.0 

BOM Marine Zones are available by anonymous FTP. http://www.bom.gov.au/catalogue/data-feeds.shtml


In [None]:
#Add geopandas to workspace libs from PyPI to run this from a pipeline (pipelines do not support pip / conda installs), 
#
#For interactive / notebook development use:
#%pip install geopandas

In [None]:
import requests, json, zipfile, pathlib, urllib
from re import sub

def downloadSLIPFile(slipPath, slipFile, saveFolder, userId, password) :
    """
    Downloads a file from West Australian government Shared Location Information Platform (SLIP)

    Code based on https://toolkit.data.wa.gov.au/hc/en-gb/articles/115000962734 
    """

    saveFile = f"{saveFolder}/{slipFile}"
    pathlib.Path(saveFolder).mkdir(exist_ok=True, parents=True)
    
    dataDownloadRequestUrl = "https://direct-download.slip.wa.gov.au/datadownload/{0}/{1}".format(slipPath, slipFile)


    tokenRequestUrl = "https://sso.slip.wa.gov.au/as/token.oauth2"
    tokenRequestHeaders = { 'Authorization' : 'Basic ZGlyZWN0LWRvd25sb2Fk'}
    tokenRequestForm={"grant_type": "password", "username":userId, "password":password}
    tokenResponse = requests.post(tokenRequestUrl, data=tokenRequestForm, headers=tokenRequestHeaders)
    accessToken=json.loads(tokenResponse.text)["access_token"]

    if tokenResponse.status_code == 200:
        print(f"Downloading file from URL: {dataDownloadRequestUrl} to {saveFolder}")
        dataDownloadRequestHeaders = { 'Authorization' : 'Bearer ' + accessToken}
        dataDownloadResponse = requests.get(dataDownloadRequestUrl, headers=dataDownloadRequestHeaders)
        if dataDownloadResponse.status_code == 200:
            with open(saveFile, 'wb') as f:
                f.write(dataDownloadResponse.content)
            
            with zipfile.ZipFile(saveFile, 'r') as zipref:
                geojsonfile=[filename for filename in zipref.namelist() if filename.endswith('.geojson')][0]
                zipref.extractall(saveFolder)
                return f"{saveFolder}/{geojsonfile}"
        else:
            print("Error download file with error " + str(dataDownloadResponse.status_code) + "-" + dataDownloadResponse.text)
    else:
        print("Error getting token: " + str(tokenResponse.status_code) + "-" + tokenResponse.text)



def downloadBOMFile(BOMServerPath, BOMServerFile, DownloadFolder):
    """
    Download a file from the Australian Bureau of Meterology anonymous FTP server
    """

    saveFile = f"{DownloadFolder}/{BOMServerFile}"
    pathlib.Path(DownloadFolder).mkdir(exist_ok=True, parents=True)
    
    url = f" ftp://anonymous@ftp.bom.gov.au{BOMServerPath}/{BOMServerFile}"
    print(f"Downloading file from URL: {url} to {DownloadFolder}")
    urllib.request.urlretrieve(url,saveFile)

    with zipfile.ZipFile(saveFile, 'r') as zipref:
        shpfile=[filename for filename in zipref.namelist() if filename.endswith('.shp')][0]
        zipref.extractall(DownloadFolder)
        return f"{DownloadFolder}/{shpfile}"


def toPascalCase(s):
    """Function to PascalCase strings."""
    s = sub(r"(_|-)+", " ", s).title().replace(" ", "").replace("*","")
    return ''.join(s)

In [None]:
import geopandas as gp
import pandas as pd
import pyspark.sql.functions as f

spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "false")
spark.conf.set("sprk.sql.parquet.vorder.enabled", "true") # Enable VOrder write
spark.conf.set("spark.microsoft.delta.optimizeWrite.enabled", "true") # Enable automatic delta optimized write

# SLIP username and password
# Storing creds in a notebook is never a good idea, use Key Vault
# mssparkutils.credentials.getSecret('https://SomeKeyVault.vault.azure.net/','SomeSecret')
#
# https://learn.microsoft.com/en-us/azure/synapse-analytics/spark/microsoft-spark-utilities?pivots=programming-language-python

SLIPUsername = ""
SLIPPassword = ""

#The WAM-002 Shipwrecks data
#https://direct-download.slip.wa.gov.au/datadownload/People_and_Society/Shipwrecks_WAM_002_WA_GDA94_Public_GeoJSON.zip
SLIPFolder="People_and_Society"
SLIPFile="Shipwrecks_WAM_002_WA_GDA94_Public_GeoJSON.zip"
WAMsaveFolder = "/lakehouse/default/Files/WAM"

#BOM IDM000003 - marine forecast zones
#see http://www.bom.gov.au/catalogue/data-feeds.shtml
BOMFolder = "/anon/home/adfd/spatial"
BOMFile = "IDM00003.zip"
BOMsaveFolder = "/lakehouse/default/Files/BOM/IDM000003"

#Download files
shipwrecks = downloadSLIPFile(SLIPFolder, SLIPFile, WAMsaveFolder, SLIPUsername, SLIPPassword)
marineZones = downloadBOMFile(BOMFolder, BOMFile, BOMsaveFolder)

#Read files, normalise CRS
df_shipwrecks = gp.read_file(shipwrecks)
df_marineZones = gp.read_file(marineZones).to_crs(df_shipwrecks.crs)

#Filter / clean
df_marineZones = df_marineZones[df_marineZones.STATE_CODE == "WA"]
df_marineZones = df_marineZones.where(df_marineZones.notna(), None)


#Spatial Join shipwrecks and marine zones
df_joined = df_shipwrecks.sjoin(df_marineZones, how="left", predicate='intersects')

#Clean up results
df_joined.rename(columns=lambda x: toPascalCase(x), inplace=True)
df_joined.drop(columns={'Geometry','DistNo','StateCode','Type', 'DateDepth','TimeDepth','MaxDepth','MinDepth','BearingTo','LengthOf','ObjectId','UniqueNum', 'IndexRight', 'Pt1Name','Pt2Name'}, inplace=True)
df_joined.rename(columns={'TypeOfSi': 'Type', 'DateInspe': 'DateInspected', 'Aac': 'AAC'}, inplace=True)
df_joined = df_joined.where(df_joined.notna(), None)

#Save to Lakehouse
saveTable = "Shipwrecks"
spark.createDataFrame(df_joined).write.mode("overwrite").option("overwriteSchema", "true").format("delta").save(f"Tables/{saveTable}")
