# Hycom Download Demo

## Requirements
* For all cells to properly execute install and run the `py-hycom_1_0_0` kernel.


## Download Hindcast Data

### 0. Populate Hindcast Entries
- To ne an `afterCreate` method for HycomDataset

In [2]:
import requests
import xmltodict
import os
import datetime
gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")
def upsertHindcasts(this):
        
    with requests.get(this.catalogUrl) as r:
            doc = xmltodict.parse(r.text)

    hindcasts = []
    # Traverse the set of hincast collections in the catalog
    # Skip the FMRC and all collections
    for ds in doc['catalog']['dataset']['dataset']['dataset']:
        if 'ALL' in ds['@name'] or 'FMRC' in ds['@name']: # restrict to Hindcast
            continue
        # Parse the year off the end of the path and convert to integer 
        year = int(os.path.basename(ds["@urlPath"]))
        
        hc = c3.HycomHindcast(
            **{
                "id": ds['@ID'],
                "dataset": {"id": "GOMu0.04_901m000_FMRC_1.0.1"},
                "name":ds['@ID'],
                "description": ds['@name'],
                "urlPath": ds['@urlPath'],
                "timeCoverage": c3.TimeRange(
                    **{
                        "start": datetime.datetime(year,1,1,12),
                        "end": datetime.datetime(year+1,1,1,11)
                    }
                ).toJson()

              }
        )
        hindcasts.append(hc)
        c3.HycomHindcast.upsertBatch(hindcasts)
    return hindcasts
hindcasts = upsertHindcasts(gom_dataset)
hindcasts

[c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2021',
 name='GOMu0.04-expt_90.1m000-2021',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2021) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2021',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2021, 1, 1, 12, 0),
                end=datetime.datetime(2022, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2020',
 name='GOMu0.04-expt_90.1m000-2020',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2020) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2020',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2020, 1, 1, 12, 0),
                end=datetime.datetime(2021, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2019',
 name='GOMu0.04-expt_90.1m000-2019',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2019) Hindcast Data (1-hrly)',
 ur

### 1. Create Archive Entry
- A `HindcastArchive` describes a set of Hindcast files to be retrived using a particular set of `HycomSubsetOption`s and `HycomDownloadOption`s.


In [3]:
import datetime
# Example: Define archive for surface `water_u` and `water_v` on a lat-long subset for the month of September 2021
hcast = c3.HycomHindcast.get("GOMu0.04-expt_90.1m000-2021")
archv = c3.HindcastArchive(
    **{
        "hindcast": hcast,
        "description": "Test subset from 9/2021",
        "subsetOptions": c3.HycomSubsetOptions(
            **{
                "timeRange": {
                    "start": datetime.datetime(2021,9,1,12),
                    "end": datetime.datetime(2021,10,1,11)
                },
                "vars": "water_u,water_v",
                "disableLLSubset": "off",
                "geospatialCoverage": {
                    "start": {
                        "latitude": 22.3,
                        "longitude": -96.5
                        },
                    "end": {
                        "latitude": 28.6,
                        "longitude": -85.5
                        }
                    }
            }
        ).toJson(),
        "downloadOptions": c3.HycomDownloadOptions(
            **{
                'externalDir': 'hycom-test'
            }
        ).toJson()
        
    }
)
archv.upsert()
archv

c3.HindcastArchive(
 hindcast=c3.HycomHindcast(
            id='GOMu0.04-expt_90.1m000-2021',
            name='GOMu0.04-expt_90.1m000-2021',
            meta=c3.Meta(
                   tenantTagId=150,
                   tenant='dev',
                   tag='tc01d',
                   created=datetime.datetime(2021, 10, 13, 23, 56, 36, tzinfo=datetime.timezone.utc),
                   createdBy='dadams@illinois.edu',
                   updated=datetime.datetime(2021, 10, 13, 23, 56, 36, tzinfo=datetime.timezone.utc),
                   updatedBy='dadams@illinois.edu',
                   timestamp=datetime.datetime(2021, 10, 13, 23, 56, 36, tzinfo=datetime.timezone.utc),
                   fetchInclude='[]',
                   fetchType='HycomHindcast'),
            version=1,
            dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
            description='(2021) Hindcast Data (1-hrly)',
            urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2021',
            timeC

### 2. Create `HindcastFile` entries
- To be automated in `afterCreate` and `afterUpdate`

In [1]:

archv = c3.HindcastArchive.get("3d4d45b7-e420-40dd-aab2-eb35db744147")
stat = archv.stageFiles()
stat

c3.ObjList<HindcastFile>(
 stats=c3.Stats(
         createdObjCount=720,
         updatedObjCount=0,
         removedObjCount=0,
         failedObjCount=0,
         objCount=720,
         dbWait=0.0,
         time=1.764,
         dbTime=0.146635657,
         objsPerSecond=408.16326530612247,
         dbObjsPerSecond=4910.129055445225),
 objs=c3.Arry<HindcastFile>([c3.HindcastFile(
         id='c41de1e6-90f4-4d55-b246-5d0f851b524b',
         name='GOMu0.04-expt_90.1m000-2021-2021-09-01T12:00:00Z.nc',
         meta=c3.Meta(
                created=datetime.datetime(2021, 10, 14, 0, 57, 56, tzinfo=datetime.timezone.utc),
                updated=datetime.datetime(2021, 10, 14, 0, 57, 56, tzinfo=datetime.timezone.utc),
                timestamp=datetime.datetime(2021, 10, 14, 0, 57, 56, tzinfo=datetime.timezone.utc)),
         version=1),
        c3.HindcastFile(
         id='c8b1d034-4899-4fd8-8ec1-827188381d1e',
         name='GOMu0.04-expt_90.1m000-2021-2021-09-01T13:00:00Z.nc',
        

In [3]:
file_id = "0033cdfe-6b50-4c9f-a5da-76381220ec38"
fr = c3.HindcastFile.get(file_id)
file = fr.download()
file

c3.File(
 contentLength=3240897,
 contentLocation='fs/dti/mpodolsky/hycom-test/hindcast/3d4d45b7-e420-40dd-aab2-eb35db744147/GOMu0.04-expt_90.1m000-2021-2021-09-25T01:00:00Z.nc',
 contentType='application/json',
 eTag='"0x8D98F021466F425"',
 contentMD5='BZsYMlsElLnelKy78vIQuA==',
 hasMetadata=True,
 url='hycom-test/hindcast/3d4d45b7-e420-40dd-aab2-eb35db744147/GOMu0.04-expt_90.1m000-2021-2021-09-25T01:00:00Z.nc')

In [7]:
ds = c3.HycomUtil.nc_open(file.url,'/tmp')
print(ds)
print(ds.variables['time'][:])
c3.HycomUtil.nc_close(ds, file.url, '/tmp')

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    classification_level: UNCLASSIFIED
    distribution_statement: Approved for public release. Distribution unlimited.
    downgrade_date: not applicable
    classification_authority: not applicable
    institution: Naval Oceanographic Office
    source: HYCOM archive file
    history: archv2ncdf3z
    field_type: instantaneous
    Conventions: CF-1.6 NAVO_netcdf_v1.1
    History: Translated to CF-1.0 Conventions by Netcdf-Java CDM (CFGridWriter2)
Original Dataset = GOMu0.04/expt_90.1m000/data/hindcasts/2021; Translation Date = 2021-10-14T11:02:12.029Z
    geospatial_lat_min: 22.280000686645508
    geospatial_lat_max: 28.600000381469727
    geospatial_lon_min: -96.47998046875
    geospatial_lon_max: -85.47998046875
    dimensions(sizes): time(1), depth(40), lat(159), lon(276)
    variables(dimensions): int16 [4mwater_u[0m(time,depth,lat,lon), float64 [4mtime[0m(time), float64 [4mdepth[0m(depth)

1

In [20]:
file = c3.HindcastFile.get("053a31bf-fea6-4295-9f16-6be21233c78e")
def deleteFile(this):
    updated = c3.HindcastFile(**{'id':this.id})
    updated.status = 'not_downloaded'
    this.file.delete()
    this.file.clearMetadata()
    this.file.clearContentMetadata()
    updated.merge()
deleteFile(file)
file = c3.HindcastFile.get("053a31bf-fea6-4295-9f16-6be21233c78e")
file

c3.HindcastFile(
 id='053a31bf-fea6-4295-9f16-6be21233c78e',
 name='GOMu0.04-expt_90.1m000-2021-2021-09-06T09:00:00Z.nc',
 meta=c3.Meta(
        tenantTagId=150,
        tenant='dev',
        tag='tc01d',
        created=datetime.datetime(2021, 10, 13, 18, 31, 27, tzinfo=datetime.timezone.utc),
        createdBy='dadams@illinois.edu',
        updated=datetime.datetime(2021, 10, 13, 21, 37, 57, tzinfo=datetime.timezone.utc),
        updatedBy='dadams@illinois.edu',
        timestamp=datetime.datetime(2021, 10, 13, 21, 37, 57, tzinfo=datetime.timezone.utc),
        fetchInclude='[]',
        fetchType='HindcastFile'),
 version=4,
 hindcastArchive=c3.HindcastArchive(id='fa69b83f-3584-4899-8118-6420929e0ca4'),
 subsetOptions=c3.HycomSubsetOptions(
                 timeRange=c3.TimeRange(
                             start=datetime.datetime(2021, 9, 6, 9, 0),
                             end=datetime.datetime(2021, 9, 6, 9, 0)),
                 timeStride=1,
                 vars='water_u,

In [25]:
files = c3.FileSystem.inst().listFiles("hycom-data/hindcast")
files

c3.ListFilesResult()

## Download a batch of FMRC Files
Once a `HycomDataset` entry has been defined from the Hycom catalog, the `updateFMRCData` method takes care of identifying avaiable FMRCs as well as staging and downloading the Data files.
Below is a demonstration using default options except for `limit` which is set to 12 to avoid creating an uncessasary server load.  
Notes:  
* The `HycomDataset` is the Parent type at the top of the hierarchy.  The original concept is for each record in HycomDataset to define a dataset from anywhere on the hycom server (hindcast, etc.)
  - However, much initial emphisis was placed on FMRC data, so some retooling may be in order to more generally support Hindcast downloads.
  - For example the `upsertFMRC` method should really be generic and determine what data is being "upserted basec on the record
* The data archiving step, illustrated here, is preliminary to processing of the data within the netCDF file into a C3 timeseries datastore. 

### Explore Options For Download Jobs

A series of types have been created to specify options:  
`HycomSubsetOptions` - Hycom related options for the Thredds server   
`FMRCDownloadOptions` - Options realted to how to request files from the server  
`FMRCDownloadJobOptions` - C3 Batch Job Options for managing the downloads  
The next cell displays the default settings for each option type.

In [6]:
# Default subset options
print(c3.HycomSubsetOptions())
# Default download options
print(c3.FMRCDownloadOptions())
# Use default download options
print(c3.FMRCDownloadJobOptions())
data_dir = 'hycom-test'

c3.HycomSubsetOptions(
 timeStride=1,
 vars='surf_el,salinity,water_temp,water_u,water_v',
 disableLLSubset='on',
 geospatialCoverage=c3.GeospatialCoverage(
                      start=c3.LatLong(
                              latitude=18.1200008392334,
                              longitude=-98.0),
                      end=c3.LatLong(
                            latitude=31.92000007629394,
                            longitude=-76.4000244140625)),
 disableProjSubset='on',
 horizStride=1,
 vertStride=1,
 addLatLon='false',
 accept='netcdf4')
c3.FMRCDownloadOptions(
 externalDir='hycom-data',
 maxTimesPerFile=1,
 maxForecastDepth=-1,
 defaultHycomDatasetId='GOMu0.04_901m000_FMRC_1.0.1')
c3.FMRCDownloadJobOptions(batchSize=4, limit=-1)


### Update FMRC's and submit the Download Job

In [2]:
# Ensure we have a Dataset entry for the desired catalog
#cat_url = "https://tds.hycom.org/thredds/catalog/GOMu0.04/expt_90.1m000/FMRC/runs/catalog.xml"
#gom_dataset = c3.HycomDataset.upsertHycomDatasetFromCatalog(url = cat_url)

# Create an updateFMRCData job.
# Use test location to store files
# Limit # of files downloaded for demo

gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")
job = gom_dataset.updateFMRCData(
    hycomSubsetOptions = c3.HycomSubsetOptions(),
    fmrcDownloadOptions = c3.FMRCDownloadOptions(
        **{
            'externalDir': data_dir
        }
    ),
    fmrcDownloadJobOptions = c3.FMRCDownloadJobOptions(
        **{
            'batchSize': 4,
            'limit': 4
        }
    )
)
job.status()

c3.BatchJobStatus(
 started=datetime.datetime(2021, 10, 11, 14, 22, 17, tzinfo=datetime.timezone.utc),
 startedby='dti-devops@lists.berkeley.edu',
 status='running')

In [3]:
# Monitor the job status until completed and display total # of files downloaded
import time
from IPython.display import clear_output
status = job.status()
while status.status == 'running':
    time.sleep(5)
    clear_output()
    status = job.status()
    gom_dataset = c3.HycomDataset.fetch(spec={'include':"this,fmrcArchiveSize"}).objs[0]
    print (gom_dataset)
    filecount = c3.FMRCFile.fetchCount(spec={'filter':"status=='downloaded'"})
    print(f"FMRC Archive Size: {round(gom_dataset.fmrcArchiveSize/(1024**3),2)} GiB")
    print(f"Download count: {filecount}")
    print (status)

c3.HycomDataset(
 id='GOMu0.04/expt_90.1m000',
 name='GOMu0.04/expt_90.1m000',
 meta=c3.Meta(
        tenantTagId=150,
        tenant='dev',
        tag='tc01d',
        created=datetime.datetime(2021, 10, 7, 23, 10, 40, tzinfo=datetime.timezone.utc),
        createdBy='provisioner',
        updated=datetime.datetime(2021, 10, 7, 23, 20, 22, tzinfo=datetime.timezone.utc),
        updatedBy='provisioner',
        timestamp=datetime.datetime(2021, 10, 11, 14, 23, 4, tzinfo=datetime.timezone.utc),
        fetchInclude='[]',
        fetchType='HycomDataset'),
 version=1900546,
 hycomVersion='1.0.1',
 description='HYCOM + NCODA Gulf of Mexico 1/25° Analysis (NRL)',
 geospatialCoverage=c3.GeospatialCoverage(
                      start=c3.LatLong(
                              latitude=18.1200008392334,
                              longitude=-98.0),
                      end=c3.LatLong(
                            latitude=31.920000076293945,
                            longitude=-76.400024

In [4]:
gom_dataset

c3.HycomDataset(
 id='GOMu0.04/expt_90.1m000',
 name='GOMu0.04/expt_90.1m000',
 meta=c3.Meta(
        tenantTagId=150,
        tenant='dev',
        tag='tc01d',
        created=datetime.datetime(2021, 10, 7, 23, 10, 40, tzinfo=datetime.timezone.utc),
        createdBy='provisioner',
        updated=datetime.datetime(2021, 10, 7, 23, 20, 22, tzinfo=datetime.timezone.utc),
        updatedBy='provisioner',
        timestamp=datetime.datetime(2021, 10, 9, 16, 29, 52, tzinfo=datetime.timezone.utc),
        fetchInclude='[]',
        fetchType='HycomDataset'),
 version=1376258,
 hycomVersion='1.0.1',
 description='HYCOM + NCODA Gulf of Mexico 1/25° Analysis (NRL)',
 geospatialCoverage=c3.GeospatialCoverage(
                      start=c3.LatLong(
                              latitude=18.1200008392334,
                              longitude=-98.0),
                      end=c3.LatLong(
                            latitude=31.920000076293945,
                            longitude=-76.400024

In [4]:
job.status()

c3.BatchJobStatus(
 started=datetime.datetime(2021, 10, 11, 13, 30, 18, tzinfo=datetime.timezone.utc),
 startedby='dti-devops@lists.berkeley.edu',
 completed=datetime.datetime(2021, 10, 11, 13, 34, 43, tzinfo=datetime.timezone.utc),
 status='completed',
 newBatchSubmitted=False)

In [8]:
pd.DataFrame(c3.FileSystem.inst().listFiles(data_dir).toJson())

NameError: name 'pd' is not defined

## Quick check: Open NetCDF files from Archive

In [51]:
# Pick the first avalable file, open it and print the NetCDF metadata
file = c3.FMRCFile.fetch().objs[1]
ds = c3.HycomUtil.nc_open(file.file.url,'/tmp')
print(ds)
c3.HycomUtil.nc_close(ds, file.file.url, '/tmp')

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    classification_level: UNCLASSIFIED
    distribution_statement: Approved for public release. Distribution unlimited.
    downgrade_date: not applicable
    classification_authority: not applicable
    institution: Naval Oceanographic Office
    source: HYCOM archive file
    history: archv2ncdf3z ;
FMRC Run 2021-09-22T12:00:00Z Dataset
    field_type: instantaneous
    Conventions: CF-1.4, NAVO_netcdf_v1.1
    cdm_data_type: GRID
    featureType: GRID
    location: Proto fmrc:GOMu0.04_901m000_FMRC
    History: Translated to CF-1.0 Conventions by Netcdf-Java CDM (CFGridWriter2)
Original Dataset = fmrc:GOMu0.04_901m000_FMRC; Translation Date = 2021-09-29T20:34:03.021Z
    geospatial_lat_min: 18.1200008392334
    geospatial_lat_max: 31.920000076293945
    geospatial_lon_min: -98.0
    geospatial_lon_max: -76.4000244140625
    dimensions(sizes): time(1), lat(346), lon(541), depth(40)
    variables(dime

1

## More Detail: Dig into the C3 Types and data Records


In [38]:
help(c3.HycomFMRC)

In [37]:
# Look at all of the currently recorded FMRC data entries
# Each record in this table corresponds to a catalog entry from the xml
import pandas as pd
myFetchResult = c3.HycomFMRC.fetch(spec={
#                                   'limit': 15,
                                  #'include': 'this, myField',
                                  #'offset': 5,
                                  #'order': "ascending(myField)",
                                  #'filter': "myField == 'myString' && myOtherField == 'myOtherString'"
                                 })

pd.DataFrame(myFetchResult.objs.toJson())

Unnamed: 0,type,id,meta,version,dataset,run,urlPath,timeCoverage,geospatialCoverage,expired
0,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",2,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-22T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",True
1,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-23T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-23T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
2,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-24T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-24T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
3,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-25T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-25T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
4,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-26T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-26T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
5,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-27T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-27T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
6,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-28T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-28T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
7,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-29T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False


In [39]:
help(c3.FMRCDataArchive)

In [40]:
myFetchResult = c3.FMRCDataArchive.fetch(spec={
#                                   'limit': 15,
                                  #'include': 'this, myField',
                                  #'offset': 5,
                                  #'order': "ascending(myField)",
                                  #'filter': "myField == 'myString' && myOtherField == 'myOtherString'"
                                 })

pd.DataFrame(myFetchResult.objs.toJson())

Unnamed: 0,type,id,meta,version,fmrc,subsetOptions,downloadOptions,archiveSize
0,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",2555905,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",1615932000.0
1,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3604481,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",2147484000.0
2,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
3,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
4,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
5,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
6,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
7,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",


In [42]:
help(c3.FMRCFile)

In [41]:
myFetchResult = c3.FMRCFile.fetch(spec={
#                                   'limit': 15,
                                  #'include': 'this, myField',
                                  #'offset': 5,
                                  #'order': "ascending(myField)",
                                  #'filter': "myField == 'myString' && myOtherField == 'myOtherString'"
                                 })

pd.DataFrame(myFetchResult.objs.toJson())

Unnamed: 0,type,id,meta,version,dataArchive,fileName,timeCoverage,timeStride,timeStrideUOM,geospatialCoverage,vars,fileType,status,file
0,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T12:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39457863, 'c..."
1,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T13:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39448241, 'c..."
2,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T14:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39450453, 'c..."
3,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T15:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39456732, 'c..."
4,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T16:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39462205, 'c..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1059,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T20:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,
1060,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T21:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,
1061,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T22:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,
1062,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T23:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,


## Cleanup Records and Files Created for this Demo

In [1]:
# Cleanup
#print(f"Removed {c3.HycomFMRCFile.removeAll()} HycomFMRCFile records.")
print(f"Removed {c3.FMRCFile.removeAll()} FMRCFile records.")
#print(f"Removed {c3.FMRCDataArchive.removeAll()} FMRCDataArchive records.")
print(f"Removed {c3.HycomFMRC.removeAll()} HycomFMRC records.")
#print(f"Removed {c3.HycomDataset.removeAll()} HycomDataset records")
print(f"Removed {c3.HindcastFile.removeAll()} HindcastFile records.")
print(f"Removed {c3.HindcastArchive.removeAll()} HindcastArchive records.")
print(f"Removed {c3.HycomHindcast.removeAll()} HycomHindCast records.")

files = c3.FileSystem.inst().listFiles(data_dir+'/hindcast')
if files.files:
    print(f"Deleting {len(files.files)} files")
    c3.FileSystem.inst().deleteFilesBatch(files.files)
print("Done.")

Removed 0 FMRCFile records.
Removed 0 HycomFMRC records.
Removed 3 HycomHindCast records.
Removed 1 HindcastArchive records.
Removed 720 HindcastFile records.
Deleting 1 files
Done.


## Testing

In [None]:
# Check for errors
errors = c3.FMRCFile.fetchCount(spec={'filter':"status=='error'"})
print(errors)

In [None]:
# Grab a dataArchive record
da = c3.FMRCDataArchive.fetch(spec = {'include': 'this,dataFiles'}).objs[0]
da

In [None]:
# Check the total size of the files in this archive
print(f"Archive Size: {round(da.archiveSize/(1024**3),2)} GiB")

In [None]:
# Check the total Size of All FMRC Data
gom_dataset = c3.HycomDataset.fetch().objs[0] # The HycomFMRC is the first on only record here, now.
print(f"FMRC Archive Size: {round(gom_dataset.fmrcArchiveSize/(1024**3),2)} GiB")

In [18]:
import requests
import xmltodict
import os
import datetime
gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")
def upsertHindcasts(this):
        
    with requests.get(this.catalogUrl) as r:
            doc = xmltodict.parse(r.text)

    hindcasts = []
    # Traverse the set of hincast collections in the catalog
    # Skip the FMRC and all collections
    for ds in doc['catalog']['dataset']['dataset']['dataset']:
        if 'ALL' in ds['@name'] or 'FMRC' in ds['@name']: # restrict to Hindcast
            continue
        # Parse the year off the end of the path and convert to integer 
        year = int(os.path.basename(ds["@urlPath"]))
        
        hc = c3.HycomHindcast(
            **{
                "id": ds['@ID'],
                "dataset": {"id": "GOMu0.04_901m000_FMRC_1.0.1"},
                "name":ds['@ID'],
                "description": ds['@name'],
                "urlPath": ds['@urlPath'],
                "timeCoverage": c3.TimeRange(
                    **{
                        "start": datetime.datetime(year,1,1,12),
                        "end": datetime.datetime(year+1,1,1,11)
                    }
                ).toJson()

              }
        )
        hindcasts.append(hc)
        
    
    return hindcasts
hindcasts = upsertHindcasts(gom_dataset)
hindcasts

[c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2021',
 name='GOMu0.04-expt_90.1m000-2021',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2021) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2021',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2021, 1, 1, 12, 0),
                end=datetime.datetime(2022, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2020',
 name='GOMu0.04-expt_90.1m000-2020',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2020) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2020',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2020, 1, 1, 12, 0),
                end=datetime.datetime(2021, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2019',
 name='GOMu0.04-expt_90.1m000-2019',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2019) Hindcast Data (1-hrly)',
 ur

In [15]:
datetime.datetime(2021,1,1)

datetime.datetime(2021, 1, 1, 0, 0)

## Create Lat-Long Pair Records

In [4]:
gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")

# This function can become a member function of `HycomDataset`
def createLatLongPairs(this):
    xgrid_objs = c3.HycomXGrid.fetch(spec={
        'include': "longitude,index",
        'order': "ascending(index)"
    }).objs

    ygrid_objs = c3.HycomYGrid.fetch(spec={
        'include': "latitude,index",
        'order': "ascending(index)"
    }).objs


    pairs = [
        c3.HycomLatLongPair(
            **{
                'id' : this.region + '_' + str(i) + '-' + str(j),
                'i': i,
                'j': j,
                'lat': ygrid_objs[j].latitude,
                'lon': xgrid_objs[i].longitude,
                'pair': c3.LatLong(
                    **{
                        'latitude': ygrid_objs[j].latitude,
                        'longitude': xgrid_objs[i].longitude
                    }
                )
            }
        )
        for i in range(0,len(xgrid_objs)-1)
        for j in range(0,len(ygrid_objs)-1)
    ]
    c3. HycomLatLongPair.upsertBatch(pairs)
    return pairs
pairs = createLatLongPairs(gom_dataset)
pairs

[c3.HycomLatLongPair(
 id='GOMu0.04_0-0',
 i=0,
 j=0,
 lat=18.1200008392334,
 lon=-98.0,
 pair=c3.LatLong(latitude=18.1200008392334, longitude=-98.0)),
 c3.HycomLatLongPair(
 id='GOMu0.04_0-1',
 i=0,
 j=1,
 lat=18.15999984741211,
 lon=-98.0,
 pair=c3.LatLong(latitude=18.15999984741211, longitude=-98.0)),
 c3.HycomLatLongPair(
 id='GOMu0.04_0-2',
 i=0,
 j=2,
 lat=18.200000762939453,
 lon=-98.0,
 pair=c3.LatLong(latitude=18.200000762939453, longitude=-98.0)),
 c3.HycomLatLongPair(
 id='GOMu0.04_0-3',
 i=0,
 j=3,
 lat=18.239999771118164,
 lon=-98.0,
 pair=c3.LatLong(latitude=18.239999771118164, longitude=-98.0)),
 c3.HycomLatLongPair(
 id='GOMu0.04_0-4',
 i=0,
 j=4,
 lat=18.280000686645508,
 lon=-98.0,
 pair=c3.LatLong(latitude=18.280000686645508, longitude=-98.0)),
 c3.HycomLatLongPair(
 id='GOMu0.04_0-5',
 i=0,
 j=5,
 lat=18.31999969482422,
 lon=-98.0,
 pair=c3.LatLong(latitude=18.31999969482422, longitude=-98.0)),
 c3.HycomLatLongPair(
 id='GOMu0.04_0-6',
 i=0,
 j=6,
 lat=18.3600006103