## Loading data products

- Load remote data for desired moorings
- Write data locally

Reproduce the contents of `ExampleCodeFromOHW22.ipynb` using functions in the `sa_upwelling` package.

In [1]:
import sys
from glob import glob
from pathlib import Path

sys.path.insert(0, "../sa_upwelling")   # Allows using the files in the Python package
import utils


from matplotlib import pyplot as plt

SA coastal moorings that are active: 
* SAM8SG
* SAM5CB

National Reference Station Kangaroo Island
* NRSKAI

SA coastal moorings that are not active but have data available:
* SAM2CP
* SAM6IS
* SAM3MS
* SAM7DS

**Notes:**
* Look at TS for details: https://imos.org.au/fileadmin/user_upload/shared/ANMN/SA_Moorings/06c._Southern_Australia_Moorings_-_TS.pdf
* Aggregate has data only from termistor
* Avoid CTD-derived data (look out for anomalies)


In [2]:
# List of moorings and corresponding regions to build S3 paths
moorings = [
    ("NRS", "NRSKAI"),
    ("SA", "SAM8SG"),
    ("SA", "SAM5CB"),
    ("SA", "SAM2CP"),
    ("SA", "SAM6IS"),
    ("SA", "SAM3MS"),
    ("SA", "SAM7DS")
]

### Hourly time-series

In [3]:
data_type = "hourly-timeseries"
timeseries_files, timeseries_ds = dict(), dict()

# Find file URLs on S3 or load local files
for region, mooring in moorings:
    
    # Check if file exists
    glob_path = glob(f"../Datasets/{region}/{mooring}/*{data_type}*")
    local = len(glob_path) > 0
    
    # Retrieve from remote if they don't exist
    if not local:
        print(f"Geting URLs of {data_type} for mooring '{mooring}'.")
        path = f"s3://imos-data/IMOS/ANMN/{region}/{mooring}/{data_type.replace('-', '_')}/"
        file_url = utils.load_file_urls(path, pattern=f"*_{data_type}_*.nc")[0]
        timeseries_files[mooring] = file_url        
    # Load them locally if they exist
    else:
        print(f"Loading local {data_type} data for mooring '{mooring}'.")
        file_url = glob_path[0]
        timeseries_files[mooring] = file_url
    
    outfile = f"../Datasets/{region}/{mooring}/" + file_url.split("/")[-1]
    timeseries_ds[mooring] = utils.open_nc(outfile if local else file_url, remote=not local)
    
    # Write files locally if they don't exist
    if not local:
        timeseries_ds[mooring].to_netcdf(outfile)



Loading local hourly-timeseries data for mooring 'NRSKAI'.
Loading local hourly-timeseries data for mooring 'SAM8SG'.
Loading local hourly-timeseries data for mooring 'SAM5CB'.
Loading local hourly-timeseries data for mooring 'SAM2CP'.
Loading local hourly-timeseries data for mooring 'SAM6IS'.
Loading local hourly-timeseries data for mooring 'SAM3MS'.
Loading local hourly-timeseries data for mooring 'SAM7DS'.


In [4]:
data_type = "aggregated-timeseries"
files, ds = dict(), dict()

# Find file URLs on S3 or load local files
for region, mooring in moorings:
    
    # Check if file exists
    glob_path = glob(f"../Datasets/{region}/{mooring}/*{data_type}*")
    local = len(glob_path) > 0
    
    # Retrieve from remote if they don't exist
    if not local:
        print(f"Geting URLs of {data_type} for mooring '{mooring}'.")
        path = f"s3://imos-data/IMOS/ANMN/{region}/{mooring}/{data_type.replace('-', '_')}/"
        pattern = f"*TEMP-{data_type}_*.nc"
        file_url = utils.load_file_urls(path, pattern)[0]
        files[mooring] = file_url
        
    # Load them locally if they exist
    else:
        print(f"Loading local {data_type} data for mooring '{mooring}'.")
        file_url = glob_path[0]
        files[mooring] = file_url
    
    outfile = f"../Datasets/{region}/{mooring}/" + file_url.split("/")[-1]
    ds[mooring] = utils.open_nc(outfile if local else file_url, remote=not local)
    
    # Write files locally if they don't exist
    if not local:
        ds[mooring].to_netcdf(outfile)

Loading local aggregated-timeseries data for mooring 'NRSKAI'.
Loading local aggregated-timeseries data for mooring 'SAM8SG'.
Loading local aggregated-timeseries data for mooring 'SAM5CB'.
Loading local aggregated-timeseries data for mooring 'SAM2CP'.
Loading local aggregated-timeseries data for mooring 'SAM6IS'.
Loading local aggregated-timeseries data for mooring 'SAM3MS'.
Loading local aggregated-timeseries data for mooring 'SAM7DS'.
