# Hycom Download Demo

## Requirements
* For all cells to properly execute install and run the `py-hycom_1_0_0` kernel.


In [2]:
data_dir = 'hycom-test'

## Download Hindcast Data

### 0. Populate Hindcast Entries
- To ne an `afterCreate` method for HycomDataset

In [2]:
import requests
import xmltodict
import os
import datetime
gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")
def upsertHindcasts(this):
        
    with requests.get(this.catalogUrl) as r:
            doc = xmltodict.parse(r.text)

    hindcasts = []
    # Traverse the set of hincast collections in the catalog
    # Skip the FMRC and all collections
    for ds in doc['catalog']['dataset']['dataset']['dataset']:
        if 'ALL' in ds['@name'] or 'FMRC' in ds['@name']: # restrict to Hindcast
            continue
        # Parse the year off the end of the path and convert to integer 
        year = int(os.path.basename(ds["@urlPath"]))
        
        hc = c3.HycomHindcast(
            **{
                "id": ds['@ID'],
                "dataset": {"id": "GOMu0.04_901m000_FMRC_1.0.1"},
                "name":ds['@ID'],
                "description": ds['@name'],
                "urlPath": ds['@urlPath'],
                "timeCoverage": c3.TimeRange(
                    **{
                        "start": datetime.datetime(year,1,1,12),
                        "end": datetime.datetime(year+1,1,1,11)
                    }
                ).toJson()

              }
        )
        hindcasts.append(hc)
        c3.HycomHindcast.upsertBatch(hindcasts)
    return hindcasts
hindcasts = upsertHindcasts(gom_dataset)
hindcasts

[c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2021',
 name='GOMu0.04-expt_90.1m000-2021',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2021) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2021',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2021, 1, 1, 12, 0),
                end=datetime.datetime(2022, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2020',
 name='GOMu0.04-expt_90.1m000-2020',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2020) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2020',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2020, 1, 1, 12, 0),
                end=datetime.datetime(2021, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2019',
 name='GOMu0.04-expt_90.1m000-2019',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2019) Hindcast Data (1-hrly)',
 ur

### 1. Create Archive Entry
- A `HindcastArchive` describes a set of Hindcast files to be retrived using a particular set of `HycomSubsetOption`s and `HycomDownloadOption`s.


In [1]:
import datetime
# Example: Define archive for surface `water_u` and `water_v` on a lat-long subset for the month of September 2021
hcast = c3.HycomHindcast.get("GOMu0.04-expt_90.1m000-2021")
archv = c3.HindcastArchive(
    **{
        "hindcast": hcast,
        "description": "Test subset from 9/2021 for Depth=4",
        "subsetOptions": c3.HycomSubsetOptions(
            **{
                "timeRange": {
                    "start": datetime.datetime(2021,9,1,0),
                    "end": datetime.datetime(2021,10,1,0)
                },
                "vars": "water_u,water_v",
                "disableLLSubset": "on",
                "vertCoord": 4
            }
        ).toJson(),
        "downloadOptions": c3.HycomDownloadOptions(
            **{
                'externalDir': 'hycom-test',
                'maxTimesPerFile': 24
            }
        ).toJson()
        
    }
)
archv.upsert()
archv

c3.HindcastArchive(
 hindcast=c3.HycomHindcast(
            id='GOMu0.04-expt_90.1m000-2021',
            name='GOMu0.04-expt_90.1m000-2021',
            meta=c3.Meta(
                   tenantTagId=150,
                   tenant='dev',
                   tag='tc01d',
                   created=datetime.datetime(2021, 10, 13, 23, 56, 36, tzinfo=datetime.timezone.utc),
                   createdBy='dadams@illinois.edu',
                   updated=datetime.datetime(2021, 10, 15, 21, 10, 44, tzinfo=datetime.timezone.utc),
                   updatedBy='dadams@illinois.edu',
                   timestamp=datetime.datetime(2021, 11, 4, 12, 6, 27, tzinfo=datetime.timezone.utc),
                   fetchInclude='[]',
                   fetchType='HycomHindcast'),
            version=57278466,
            dataset=c3.HycomDataset(id='GOMu0.04/expt_90.1m000'),
            description='(2021) Hindcast Data (1-hrly)',
            urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2021',
            timeC

### Update the timeRange for an Archive

In [1]:
from datetime import datetime, timedelta
aid = '9438b440-e200-4764-b6fe-ee6a278aff55'
archv = c3.HindcastArchive.get(aid)
print(f"start: {archv.subsetOptions.timeRange.start}")
print(f"end: {archv.subsetOptions.timeRange.end}")
new = c3.TimeRange(
    #start = datetime(2021,9,1,12),
    start = archv.subsetOptions.timeRange.start,
    #end = archv.subsetOptions.timeRange.end + timedelta(days=1)
    end = datetime(2021,12,11,23)
)
print(f"new start: {new.start}")
print(f"new end: {new.end}")
archv.updateTimeRange(new)
archv = c3.HindcastArchive.get(aid)
print(f"check start: {archv.subsetOptions.timeRange.start}")
print(f"check end: {archv.subsetOptions.timeRange.end}")

start: 2021-09-01 00:00:00
end: 2021-12-04 23:00:00
new start: 2021-09-01 00:00:00
new end: 2021-12-11 23:00:00
check start: 2021-09-01 00:00:00
check end: 2021-12-11 23:00:00


### Remove HindcastFiles

In [8]:
ids=[
    "9438b440-e200-4764-b6fe-ee6a278aff55/GOMu0.04-expt_90.1m000-2021-2021-10-03T00:00:00Z-2021-10-03T00:00:00Z.nc"
    ]
data_dir = 'hycom-test'
files = c3.FileSystem.inst().listFiles(data_dir+'/hindcast')
print(len(files.files))
hfs = c3.HindcastFile.fetch(spec={"filter": c3.Filter.inst().intersects("id",ids)}).objs
#print (c3.ObjList(c3.HindcastFile,**hfs.toJson()))
for hf in hfs:
    #print(hf)
    print (hf.remove())
files = c3.FileSystem.inst().listFiles(data_dir+'/hindcast')
print(len(files.files))

36
True
35


### 2. Create `HindcastFile` entries
- To be automated in `afterCreate` and `afterUpdate`

In [3]:
# This has been automated, no need to do manually!
# archv = c3.HindcastArchive.get("7a187ef2-d88d-4658-b19f-451aef87535f")
# stat = archv.stageFiles()
# stat

In [5]:
# file_id = "0033cdfe-6b50-4c9f-a5da-76381220ec38"
# fr = c3.HindcastFile.get(file_id)
# file = fr.download()
# file

In [7]:
ds = c3.HycomUtil.nc_open(file.url,'/tmp')
print(ds)
print(ds.variables['time'][:])
c3.HycomUtil.nc_close(ds, file.url, '/tmp')

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    classification_level: UNCLASSIFIED
    distribution_statement: Approved for public release. Distribution unlimited.
    downgrade_date: not applicable
    classification_authority: not applicable
    institution: Naval Oceanographic Office
    source: HYCOM archive file
    history: archv2ncdf3z
    field_type: instantaneous
    Conventions: CF-1.6 NAVO_netcdf_v1.1
    History: Translated to CF-1.0 Conventions by Netcdf-Java CDM (CFGridWriter2)
Original Dataset = GOMu0.04/expt_90.1m000/data/hindcasts/2021; Translation Date = 2021-10-14T11:02:12.029Z
    geospatial_lat_min: 22.280000686645508
    geospatial_lat_max: 28.600000381469727
    geospatial_lon_min: -96.47998046875
    geospatial_lon_max: -85.47998046875
    dimensions(sizes): time(1), depth(40), lat(159), lon(276)
    variables(dimensions): int16 [4mwater_u[0m(time,depth,lat,lon), float64 [4mtime[0m(time), float64 [4mdepth[0m(depth)

1

### Download batch of Staged Hindcast Files

In [7]:
job = c3.HindcastDownloadJob(**{'options': c3.HycomDownloadJobOptions().toJson()}).upsert()
job.start()

c3.BatchJobStatus(
 started=datetime.datetime(2021, 12, 12, 12, 10, 20, tzinfo=datetime.timezone.utc),
 startedby='dadams@illinois.edu',
 status='running')

In [8]:
job.status()

c3.BatchJobStatus(
 started=datetime.datetime(2021, 12, 12, 12, 10, 20, tzinfo=datetime.timezone.utc),
 startedby='dadams@illinois.edu',
 status='running',
 newBatchSubmitted=True)

In [2]:
# # Create Custom Batch job
# job = c3.HindcastDownloadJob(
#     **{
#         'options': c3.HycomDownloadJobOptions(
#             **{
#                 'batchSize': 31,
#                 'limit': 300
#             }
#         ).toJson()
#     }
# ).upsert()
# job.start()

In [9]:
import time
from IPython.display import clear_output
status = job.status()
while status.status == 'running':
    archv = c3.HindcastArchive.get("9438b440-e200-4764-b6fe-ee6a278aff55")
    clear_output()
    status = job.status()
    #gom_dataset = c3.HycomDataset.fetch(spec={'include':"this,hindcastArchiveSize"}).objs[0]
    #print (gom_dataset)
    print(f"Archive Size: {round(archv.archiveSize/(1024**3),5)} GiB")
    filecount = c3.HindcastFile.fetchCount(spec={'filter':"hindcastArchive.id=='"+archv.id+"' && status=='downloaded'"})
    allcount = c3.HindcastFile.fetchCount(spec={'filter':"hindcastArchive.id=='"+archv.id+"'"})
    print(f"Download count: {filecount} of {allcount}")
    print (status)
    time.sleep(20)

Archive Size: 0.70052 GiB
Download count: 102 of 102
c3.BatchJobStatus(
 started=datetime.datetime(2021, 12, 12, 12, 10, 20, tzinfo=datetime.timezone.utc),
 startedby='dadams@illinois.edu',
 completed=datetime.datetime(2021, 12, 12, 12, 28, 22, tzinfo=datetime.timezone.utc),
 status='completed',
 newBatchSubmitted=False)


In [11]:
job.cancel()

c3.BatchJobStatus(
 started=datetime.datetime(2021, 11, 4, 11, 46, 35, tzinfo=datetime.timezone.utc),
 startedby='dadams@illinois.edu',
 completed=datetime.datetime(2021, 11, 4, 12, 5, 6, tzinfo=datetime.timezone.utc),
 status='canceled',
 newBatchSubmitted=True)

In [26]:
archv = c3.HindcastArchive.get("7a187ef2-d88d-4658-b19f-451aef87535f")
filecount = c3.HindcastFile.fetch(spec={'include':'this','filter':"hindcast.id=='"+archv.id+"' && status=='downloaded'"}).objs
filecount 

500 - NotClassified - c3.love.exceptions.C3RuntimeException_wrapIt [1472.353]
message: "wrapped RuntimeException: Field 'hindcast' does not exist in expression: 'meta.tenantTagId == 150 && (hindcast.id == '7a187ef2-d88d-4658-b19f-451aef87535f' && status == 'downloaded')' for type 'HindcastFile'."
JSON: {"spec": {"type": "FetchSpec", "filter": "hindcast.id=='7a187ef2-d88d-4658-b19f-451aef87535f' && status=='downloaded'", "include": "this", "offset": 0, "limit": 2000}}


C3RuntimeException: 500 - NotClassified - c3.love.exceptions.C3RuntimeException_wrapIt [1472.353]
message: "wrapped RuntimeException: Field 'hindcast' does not exist in expression: 'meta.tenantTagId == 150 && (hindcast.id == '7a187ef2-d88d-4658-b19f-451aef87535f' && status == 'downloaded')' for type 'HindcastFile'."
JSON: {"spec": {"type": "FetchSpec", "filter": "hindcast.id=='7a187ef2-d88d-4658-b19f-451aef87535f' && status=='downloaded'", "include": "this", "offset": 0, "limit": 2000}}

In [9]:
file = c3.HindcastFile.get("053a31bf-fea6-4295-9f16-6be21233c78e")
def deleteFile(this):
    updated = c3.HindcastFile(**{'id':this.id})
    updated.status = 'not_downloaded'
    this.file.delete()
    this.file.clearMetadata()
    this.file.clearContentMetadata()
    updated.merge()
deleteFile(file)
file = c3.HindcastFile.get("053a31bf-fea6-4295-9f16-6be21233c78e")
file

AttributeError: 'NoneType' object has no attribute 'id'

In [16]:
from datetime import datetime
datetime.fromtimestamp(1638545687)

datetime.datetime(2021, 12, 3, 15, 34, 47)

In [25]:
files = c3.FileSystem.inst().listFiles("hycom-data/hindcast")
files

c3.ListFilesResult()

## Download a batch of FMRC Files
Once a `HycomDataset` entry has been defined from the Hycom catalog, the `updateFMRCData` method takes care of identifying avaiable FMRCs as well as staging and downloading the Data files.
Below is a demonstration using default options except for `limit` which is set to 12 to avoid creating an uncessasary server load.  
Notes:  
* The `HycomDataset` is the Parent type at the top of the hierarchy.  The original concept is for each record in HycomDataset to define a dataset from anywhere on the hycom server (hindcast, etc.)
  - However, much initial emphisis was placed on FMRC data, so some retooling may be in order to more generally support Hindcast downloads.
  - For example the `upsertFMRC` method should really be generic and determine what data is being "upserted basec on the record
* The data archiving step, illustrated here, is preliminary to processing of the data within the netCDF file into a C3 timeseries datastore. 

### Explore Options For Download Jobs

A series of types have been created to specify options:  
`HycomSubsetOptions` - Hycom related options for the Thredds server   
`FMRCDownloadOptions` - Options realted to how to request files from the server  
`FMRCDownloadJobOptions` - C3 Batch Job Options for managing the downloads  
The next cell displays the default settings for each option type.

In [7]:
# Default subset options
print(c3.HycomSubsetOptions())
# Default download options
print(c3.HycomDownloadOptions())
# Use default download options
print(c3.FMRCDownloadJobOptions())
data_dir = 'hycom-test'

c3.HycomSubsetOptions(
 timeStride=1,
 vars='surf_el,salinity,water_temp,water_u,water_v',
 disableLLSubset='on',
 geospatialCoverage=c3.GeospatialCoverage(
                      start=c3.LatLong(
                              latitude=18.1200008392334,
                              longitude=-98.0),
                      end=c3.LatLong(
                            latitude=31.92000007629394,
                            longitude=-76.4000244140625)),
 disableProjSubset='on',
 horizStride=1,
 vertStride=1,
 vertCoord=-1,
 addLatLon='false',
 accept='netcdf4')
c3.HycomDownloadOptions(
 externalDir='hycom-data',
 maxTimesPerFile=1,
 maxForecastDepth=-1)
c3.FMRCDownloadJobOptions(batchSize=4, limit=-1)


### Update FMRC's and submit the Download Job

In [2]:
# Ensure we have a Dataset entry for the desired catalog
#cat_url = "https://tds.hycom.org/thredds/catalog/GOMu0.04/expt_90.1m000/FMRC/runs/catalog.xml"
#gom_dataset = c3.HycomDataset.upsertHycomDatasetFromCatalog(url = cat_url)

# Create an updateFMRCData job.
data_dir = 'hycom-data'
#gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")

subsetOptions = c3.HycomSubsetOptions(
    **{
        "vars": "water_u,water_v",
        "disableLLSubset": "on",
        "vertCoord": 4
    }
)
# The download options
# Forces updateFMRCData() to bundle all forcast times in 1 file
downloadOptions = c3.HycomDownloadOptions(
    **{
        'externalDir': data_dir,
        'maxTimesPerFile': -1
    }
)

job = c3.HycomDataset.updateFMRCData(
    hycomDatasetId = "GOMu0.04/expt_90.1m000",
    hycomSubsetOptions = subsetOptions,
    hycomDownloadOptions = downloadOptions,
    fmrcDownloadJobOptions = c3.FMRCDownloadJobOptions(
        batchSize = 3,
        limit = -1
    )
)
job.status()

c3.BatchJobStatus(
 started=datetime.datetime(2021, 11, 28, 22, 38, 57, tzinfo=datetime.timezone.utc),
 startedby='dti-devops@lists.berkeley.edu',
 status='running')

In [3]:
# Monitor the job status until completed and display total # of files downloaded
import time
from IPython.display import clear_output
status = job.status()
while status.status == 'running':
    time.sleep(5)
    clear_output()
    status = job.status()
    gom_dataset = c3.HycomDataset.fetch(spec={'include':"this,fmrcArchiveSize"}).objs[0]
    #print (gom_dataset)
    filecount = c3.FMRCFile.fetchCount(spec={'filter':"status=='downloaded'"})
    print(f"FMRC Archive Size: {round(gom_dataset.fmrcArchiveSize/(1024**3),2)} GiB")
    print(f"Download count: {filecount}")
    print("")
    print (status)

FMRC Archive Size: 0.78 GiB
Download count: 8

c3.BatchJobStatus(
 started=datetime.datetime(2021, 11, 28, 22, 38, 57, tzinfo=datetime.timezone.utc),
 startedby='dti-devops@lists.berkeley.edu',
 completed=datetime.datetime(2021, 11, 28, 22, 39, 28, tzinfo=datetime.timezone.utc),
 status='completed',
 newBatchSubmitted=False)


In [3]:
job.status()

c3.BatchJobStatus(
 started=datetime.datetime(2021, 11, 27, 15, 12, 47, tzinfo=datetime.timezone.utc),
 startedby='dti-devops@lists.berkeley.edu',
 status='running',
 newBatchSubmitted=True)

### Strange FMRCs - should be fixed now

In [5]:
from datetime import timedelta
def gentimes(start,end,stride):
        t = start
        while t <= end:
            yield t
            t += timedelta(hours=stride)
strangeFMRCs = c3.HycomFMRC.evaluate({'group': "id", 'projection':"id,count(fmrcFiles)",'having':"count(fmrcFiles)>1"}).tuples
for sfmrc in strangeFMRCs:
    id = sfmrc.cells[0].str
    fmrc = c3.HycomFMRC.fetch(spec={'filter':"id=='"+id+"'",'include':"timeCoverage,runDate,fmrcFiles.subsetOptions,fmrcFiles.downloadOptions,fmrcFiles.file"}).objs[0]
    files = fmrc.fmrcFiles
    for f in files:
        print (fmrc.runDate)
        print(fmrc.timeCoverage)
        print(f.subsetOptions.timeRange)
        print(f.downloadOptions.maxTimesPerFile)
        print(len(list(gentimes(f.subsetOptions.timeRange.start,f.subsetOptions.timeRange.end,1))))
        #nc = c3.HycomUtil.nc_open(f.file.url)
        #print(nc)
        #c3.HycomUtil.nc_close(nc,f.file.url)

2021-11-21 12:00:00+00:00
c3.TimeRange(
 start=datetime.datetime(2021, 11, 21, 12, 0, tzinfo=datetime.timezone.utc),
 end=datetime.datetime(2021, 11, 27, 0, 0, tzinfo=datetime.timezone.utc))
c3.TimeRange(
 start=datetime.datetime(2021, 11, 21, 12, 0, tzinfo=datetime.timezone.utc),
 end=datetime.datetime(2021, 11, 26, 12, 0, tzinfo=datetime.timezone.utc))
121
121
2021-11-21 12:00:00+00:00
c3.TimeRange(
 start=datetime.datetime(2021, 11, 21, 12, 0, tzinfo=datetime.timezone.utc),
 end=datetime.datetime(2021, 11, 27, 0, 0, tzinfo=datetime.timezone.utc))
c3.TimeRange(
 start=datetime.datetime(2021, 11, 21, 12, 0, tzinfo=datetime.timezone.utc),
 end=datetime.datetime(2021, 11, 27, 0, 0, tzinfo=datetime.timezone.utc))
133
133
2021-11-21 12:00:00+00:00
c3.TimeRange(
 start=datetime.datetime(2021, 11, 21, 12, 0, tzinfo=datetime.timezone.utc),
 end=datetime.datetime(2021, 11, 27, 0, 0, tzinfo=datetime.timezone.utc))
c3.TimeRange(
 start=datetime.datetime(2021, 11, 26, 13, 0, tzinfo=datetime.time

## Quick check: Open NetCDF files from Archive

In [51]:
# Pick the first avalable file, open it and print the NetCDF metadata
file = c3.FMRCFile.fetch().objs[1]
ds = c3.HycomUtil.nc_open(file.file.url,'/tmp')
print(ds)
c3.HycomUtil.nc_close(ds, file.file.url, '/tmp')

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    classification_level: UNCLASSIFIED
    distribution_statement: Approved for public release. Distribution unlimited.
    downgrade_date: not applicable
    classification_authority: not applicable
    institution: Naval Oceanographic Office
    source: HYCOM archive file
    history: archv2ncdf3z ;
FMRC Run 2021-09-22T12:00:00Z Dataset
    field_type: instantaneous
    Conventions: CF-1.4, NAVO_netcdf_v1.1
    cdm_data_type: GRID
    featureType: GRID
    location: Proto fmrc:GOMu0.04_901m000_FMRC
    History: Translated to CF-1.0 Conventions by Netcdf-Java CDM (CFGridWriter2)
Original Dataset = fmrc:GOMu0.04_901m000_FMRC; Translation Date = 2021-09-29T20:34:03.021Z
    geospatial_lat_min: 18.1200008392334
    geospatial_lat_max: 31.920000076293945
    geospatial_lon_min: -98.0
    geospatial_lon_max: -76.4000244140625
    dimensions(sizes): time(1), lat(346), lon(541), depth(40)
    variables(dime

1

## More Detail: Dig into the C3 Types and data Records


In [38]:
help(c3.HycomFMRC)

In [37]:
# Look at all of the currently recorded FMRC data entries
# Each record in this table corresponds to a catalog entry from the xml
import pandas as pd
myFetchResult = c3.HycomFMRC.fetch(spec={
#                                   'limit': 15,
                                  #'include': 'this, myField',
                                  #'offset': 5,
                                  #'order': "ascending(myField)",
                                  #'filter': "myField == 'myString' && myOtherField == 'myOtherString'"
                                 })

pd.DataFrame(myFetchResult.objs.toJson())

Unnamed: 0,type,id,meta,version,dataset,run,urlPath,timeCoverage,geospatialCoverage,expired
0,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",2,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-22T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",True
1,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-23T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-23T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
2,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-24T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-24T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
3,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-25T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-25T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
4,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-26T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-26T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
5,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-27T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-27T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
6,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-28T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-28T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False
7,HycomFMRC,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomDataset', 'id': 'GOMu0.04_901m0...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'TimeRange', 'start': '2021-09-29T12:...","{'type': 'GeospatialCoverage', 'start': {'type...",False


In [39]:
help(c3.FMRCDataArchive)

In [40]:
myFetchResult = c3.FMRCDataArchive.fetch(spec={
#                                   'limit': 15,
                                  #'include': 'this, myField',
                                  #'offset': 5,
                                  #'order': "ascending(myField)",
                                  #'filter': "myField == 'myString' && myOtherField == 'myOtherString'"
                                 })

pd.DataFrame(myFetchResult.objs.toJson())

Unnamed: 0,type,id,meta,version,fmrc,subsetOptions,downloadOptions,archiveSize
0,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",2555905,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",1615932000.0
1,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3604481,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",2147484000.0
2,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
3,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
4,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
5,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
6,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",
7,FMRCDataArchive,GOMu0.04/expt_90.1m000/FMRC/runs/GOMu0.04_901m...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'HycomFMRC', 'id': 'GOMu0.04/expt_90....","{'type': 'FMRCSubsetOptions', 'timeRange': {'t...","{'type': 'FMRCDownloadOptions', 'externalDir':...",


In [42]:
help(c3.FMRCFile)

In [41]:
myFetchResult = c3.FMRCFile.fetch(spec={
#                                   'limit': 15,
                                  #'include': 'this, myField',
                                  #'offset': 5,
                                  #'order': "ascending(myField)",
                                  #'filter': "myField == 'myString' && myOtherField == 'myOtherString'"
                                 })

pd.DataFrame(myFetchResult.objs.toJson())

Unnamed: 0,type,id,meta,version,dataArchive,fileName,timeCoverage,timeStride,timeStrideUOM,geospatialCoverage,vars,fileType,status,file
0,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T12:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39457863, 'c..."
1,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T13:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39448241, 'c..."
2,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T14:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39450453, 'c..."
3,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T15:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39456732, 'c..."
4,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",3,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-22T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-09-22T16:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,downloaded,"{'type': 'File', 'contentLength': 39462205, 'c..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1059,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T20:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,
1060,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T21:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,
1061,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T22:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,
1062,FMRCFile,GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'Meta', 'tenantTagId': 150, 'tenant':...",1,"{'type': 'FMRCDataArchive', 'id': 'GOMu0.04/ex...",GOMu0.04_901m000_FMRC_RUN_2021-09-29T12:00:00Z...,"{'type': 'TimeRange', 'start': '2021-10-04T23:...",1,"{'type': 'Unit', 'id': 'hour'}","{'type': 'GeospatialCoverage', 'start': {'type...","surf_el,salinity,water_temp,water_u,water_v",netcdf4,not_downloaded,


## Cleanup Records and Files Created for this Demo

In [18]:
# Cleanup
data_dir='hycom-test'
fmrcFiles = c3.FMRCFile.fetch(spec={'limit':-1}).objs
for f in fmrcFiles:
    if (f.file):
        print(f"Deleting {f.id}")
        f.file.delete()
print(f"Removed {c3.FMRCFile.removeAll()} FMRCFile records.")
#print(f"Removed {c3.FMRCDataArchive.removeAll()} FMRCDataArchive records.")
print(f"Removed {c3.HycomFMRC.removeAll()} HycomFMRC records.")
#print(f"Removed {c3.HycomDataset.removeAll()} HycomDataset records")
#print(f"Removed {c3.HindcastFile.removeAll()} HindcastFile records.")
#print(f"Removed {c3.HindcastArchive.removeAll()} HindcastArchive records.")
#print(f"Removed {c3.HycomHindcast.removeAll()} HycomHindCast records.")

# files = c3.FileSystem.inst().listFiles(data_dir+'/hindcast')
# if files.files:
#     print(f"Deleting {len(files.files)} files")
#     c3.FileSystem.inst().deleteFilesBatch(files.files)
# print("Done.")

Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-11T12:00:00Z-2021-11-11T12:00:00Z-2021-11-17T00:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-12T12:00:00Z-2021-11-12T12:00:00Z-2021-11-17T12:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-13T12:00:00Z-2021-11-13T12:00:00Z-2021-11-19T00:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-14T12:00:00Z-2021-11-14T12:00:00Z-2021-11-20T00:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-16T12:00:00Z-2021-11-16T12:00:00Z-2021-11-22T00:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-17T12:00:00Z-2021-11-17T12:00:00Z-2021-11-23T00:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-18T12:00:00Z-2021-11-18T12:00:00Z-2021-11-23T12:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-19T12:00:00Z-2021-11-19T12:00:00Z-2021-11-24T12:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-20T12:00:00Z-2021-11-20T12:00:00Z-2021-11-25T12:00:00Z.nc
Deleting GOMu0.04_901m000_FMRC_RUN_2021-11-21T12:00:00Z-2021-11-21T12:00:00Z-2021-11-26T12:

## Testing

In [None]:
# Check for errors
errors = c3.FMRCFile.fetchCount(spec={'filter':"status=='error'"})
print(errors)

In [None]:
# Grab a dataArchive record
da = c3.FMRCDataArchive.fetch(spec = {'include': 'this,dataFiles'}).objs[0]
da

In [None]:
# Check the total size of the files in this archive
print(f"Archive Size: {round(da.archiveSize/(1024**3),2)} GiB")

In [None]:
# Check the total Size of All FMRC Data
gom_dataset = c3.HycomDataset.fetch().objs[0] # The HycomFMRC is the first on only record here, now.
print(f"FMRC Archive Size: {round(gom_dataset.fmrcArchiveSize/(1024**3),2)} GiB")

In [18]:
import requests
import xmltodict
import os
import datetime
gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")
def upsertHindcasts(this):
        
    with requests.get(this.catalogUrl) as r:
            doc = xmltodict.parse(r.text)

    hindcasts = []
    # Traverse the set of hincast collections in the catalog
    # Skip the FMRC and all collections
    for ds in doc['catalog']['dataset']['dataset']['dataset']:
        if 'ALL' in ds['@name'] or 'FMRC' in ds['@name']: # restrict to Hindcast
            continue
        # Parse the year off the end of the path and convert to integer 
        year = int(os.path.basename(ds["@urlPath"]))
        
        hc = c3.HycomHindcast(
            **{
                "id": ds['@ID'],
                "dataset": {"id": "GOMu0.04_901m000_FMRC_1.0.1"},
                "name":ds['@ID'],
                "description": ds['@name'],
                "urlPath": ds['@urlPath'],
                "timeCoverage": c3.TimeRange(
                    **{
                        "start": datetime.datetime(year,1,1,12),
                        "end": datetime.datetime(year+1,1,1,11)
                    }
                ).toJson()

              }
        )
        hindcasts.append(hc)
        
    
    return hindcasts
hindcasts = upsertHindcasts(gom_dataset)
hindcasts

[c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2021',
 name='GOMu0.04-expt_90.1m000-2021',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2021) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2021',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2021, 1, 1, 12, 0),
                end=datetime.datetime(2022, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2020',
 name='GOMu0.04-expt_90.1m000-2020',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2020) Hindcast Data (1-hrly)',
 urlPath='GOMu0.04/expt_90.1m000/data/hindcasts/2020',
 timeCoverage=c3.TimeRange(
                start=datetime.datetime(2020, 1, 1, 12, 0),
                end=datetime.datetime(2021, 1, 1, 11, 0))),
 c3.HycomHindcast(
 id='GOMu0.04-expt_90.1m000-2019',
 name='GOMu0.04-expt_90.1m000-2019',
 dataset=c3.HycomDataset(id='GOMu0.04_901m000_FMRC_1.0.1'),
 description='(2019) Hindcast Data (1-hrly)',
 ur

In [15]:
datetime.datetime(2021,1,1)

datetime.datetime(2021, 1, 1, 0, 0)

## Create Lat-Long Pair  and Series Records

### Latest Create Routines

#### Create `HycomLatLongPair` Records

In [1]:
c3.HycomLatLongPair.removeAll()

186300

In [2]:
from itertools import islice
def chunk(gen, k):
    while True:
        chunk = [*islice(gen, 0, k)]
        if chunk:
            yield chunk
        else:
            break

In [3]:
def getGenPairs(this):
    xgrid_objs = c3.HycomXGrid.fetch(spec={
        'include': "longitude,index",
        'order': "ascending(index)",
        'limit': -1
    }).objs

    ygrid_objs = c3.HycomYGrid.fetch(spec={
        'include': "latitude,index",
        'order': "ascending(index)",
        'limit': -1
    }).objs
    gen = (
            {
                    'id' : this.region + '_' + str(i) + '-' + str(j),
                    'i': i,
                    'j': j,
                    'lat': ygrid_objs[j].latitude,
                    'lon': xgrid_objs[i].longitude,
                    'pair': {
                            'latitude': ygrid_objs[j].latitude,
                            'longitude': xgrid_objs[i].longitude
                        }
                }
            for i in range(len(xgrid_objs))
            for j in range(len(ygrid_objs))
    )
    return gen

In [4]:
gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")
genPairs = getGenPairs(gom_dataset)
# createBatch with batches of 10,000.
def createFunc(objs):
    c3.HycomLatLongPair.createBatch(objs)
_ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,10000)])

#### Create SurfaceHindcastDataSeries Records

In [4]:
latLongPairs = c3.HycomLatLongPair.fetch(spec={'limit': -1}).objs

In [5]:
from itertools import islice
def chunk(gen, k):
    while True:
        chunk = [*islice(gen, 0, k)]
        if chunk:
            yield chunk
        else:
            break
seriesGen = (
#        {
#             'id': 'HNDCST_SRFC_' + str(pair.i) + '-' + str(pair.j), 
#             'latLongPair': pair.id
#         }
    c3.SurfaceHindcastDataSeries(
       **{
            'id': 'HNDCST_SRFC_' + str(pair.i) + '-' + str(pair.j), 
            'latLongPair': pair.id
        }
    )
    for pair in latLongPairs
)

In [7]:
c3.SurfaceHindcastDataSeries.removeAll()

186300

In [10]:
_ = c3.SurfaceHindcastDataSeries.createBatch(next(chunk(seriesGen,50000)))

In [None]:
# createBatch with batches of 10,000.
# def createFunc(objs):
#     c3.SurfaceHindcastDataSeries.createBatch(objs)
# _ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(seriesGen,10000)])

### Testing

In [59]:
# createBatch for all 186,300 at once
def createFunc(objs):
    c3.HycomLatLongPair.createBatch(objs)
c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,186300)])

[None]

In [82]:
c3.HycomLatLongPair.removeAll()

186300

In [83]:
genPairs = getGenPairs(gom_dataset)

In [72]:
# createBatch with batches of 1,000.
def createFunc(objs):
    c3.HycomLatLongPair.mergeBatch(objs)
_ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,1863)])

In [75]:
# createBatch for 100 batches of 1,863.
def createFunc(objs):
    c3.HycomLatLongPair.createBatch(objs)
_ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,1863)])

In [69]:
# createBatch with batches of 5,000.
def createFunc(objs):
    c3.HycomLatLongPair.createBatch(objs)
_ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,5000)])

In [78]:
# createBatch with batches of 10,000.
def createFunc(objs):
    c3.HycomLatLongPair.createBatch(objs)
_ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,10000)])

In [81]:
# createBatch with batches of 20,000.
def createFunc(objs):
    c3.HycomLatLongPair.createBatch(objs)
_ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,20000)])

In [84]:
# mergeBatch with batches of 20,000.
def createFunc(objs):
    c3.HycomLatLongPair.mergeBatch(objs)
_ = c3.Client.executeConcurrently(createFunc,[(x,) for x in chunk(genPairs,20000)])

In [4]:
xgrid_objs = c3.HycomXGrid.fetch(spec={
        'include': "longitude,index",
        'order': "ascending(index)",
        'limit': -1
    }).objs

In [11]:
a=[1,2]
len(a)
list(range(2))

[0, 1]

In [12]:
gom_dataset = c3.HycomDataset.get("GOMu0.04/expt_90.1m000")

# This function can become a member function of `HycomDataset`
def createLatLongPairs(this):
    xgrid_objs = c3.HycomXGrid.fetch(spec={
        'include': "longitude,index",
        'order': "ascending(index)",
        'limit': -1
    }).objs

    ygrid_objs = c3.HycomYGrid.fetch(spec={
        'include': "latitude,index",
        'order': "ascending(index)",
        'limit': -1
    }).objs


#     pairs = [
#         c3.HycomLatLongPair(
#             **{
#                 'id' : this.region + '_' + str(i) + '-' + str(j),
#                 'i': i,
#                 'j': j,
#                 'lat': ygrid_objs[j].latitude,
#                 'lon': xgrid_objs[i].longitude,
#                 'pair': c3.LatLong(
#                     **{
#                         'latitude': ygrid_objs[j].latitude,
#                         'longitude': xgrid_objs[i].longitude
#                     }
#                 )
#             }
#         )
#         for i in range(0,len(xgrid_objs)-1)
#         for j in range(0,len(ygrid_objs)-1)
#     ]
    pairs = [
        {
                'id' : this.region + '_' + str(i) + '-' + str(j),
                'i': i,
                'j': j,
                'lat': ygrid_objs[j].latitude,
                'lon': xgrid_objs[i].longitude,
                'pair': {
                        'latitude': ygrid_objs[j].latitude,
                        'longitude': xgrid_objs[i].longitude
                    }
            }
        for i in range(len(xgrid_objs))
        for j in range(len(ygrid_objs))
    ]
#     c3. HycomLatLongPair.upsertBatch(pairs)
#     return pairs
    return pairs
pairs = createLatLongPairs(gom_dataset)
pairs[0]

{'id': 'GOMu0.04_0-0',
 'i': 0,
 'j': 0,
 'lat': 18.1200008392334,
 'lon': -98.0,
 'pair': {'latitude': 18.1200008392334, 'longitude': -98.0}}

In [9]:
len(pairs)

186300

upsertBatch - 4m7s
createBatch - 3m14s

In [20]:
c3. HycomLatLongPair.createBatch(pairs)

c3.ObjList<HycomLatLongPair>(
 stats=c3.Stats(
         createdObjCount=186300,
         updatedObjCount=0,
         removedObjCount=0,
         failedObjCount=0,
         objCount=186300,
         dbWait=0.0,
         time=75.929,
         dbTime=27.000258945,
         objsPerSecond=2453.607975872196,
         dbObjsPerSecond=6899.933825801314),
 objs=c3.Arry<HycomLatLongPair>([c3.HycomLatLongPair(
         id='GOMu0.04_0-0',
         meta=c3.Meta(
                created=datetime.datetime(2021, 10, 27, 20, 53, 14, tzinfo=datetime.timezone.utc),
                updated=datetime.datetime(2021, 10, 27, 20, 53, 14, tzinfo=datetime.timezone.utc),
                timestamp=datetime.datetime(2021, 10, 27, 20, 53, 14, tzinfo=datetime.timezone.utc)),
         version=1),
        c3.HycomLatLongPair(
         id='GOMu0.04_0-1',
         meta=c3.Meta(
                created=datetime.datetime(2021, 10, 27, 20, 53, 14, tzinfo=datetime.timezone.utc),
                updated=datetime.datetime(2021

In [2]:
c3.SurfaceHindcastDataSeries.removeAll()

186300

In [3]:
# Create SurfaceHindcastDataSeries records
latLongPairs = c3.HycomLatLongPair.fetch(spec={'limit': -1}).objs

# series = [
#     c3.SurfaceHindcastDataSeries(
#         **{
#             'id': 'HNDCST_SRFC_' + str(pair.i) + '-' + str(pair.j), 
#             'latLongPair': pair.id
#         }
#     )
#     for pair in latLongPairs
# ]
print("fetched")
series = [
    {
            'id': 'HNDCST_SRFC_' + str(pair.i) + '-' + str(pair.j), 
            'latLongPair': pair.id
        }
    for pair in latLongPairs
]
print("upserting...")
c3.SurfaceHindcastDataSeries.createBatch(series)

fetched
upserting...


c3.ObjList<SurfaceHindcastDataSeries>(
 stats=c3.Stats(
         createdObjCount=186300,
         updatedObjCount=0,
         removedObjCount=0,
         failedObjCount=0,
         objCount=186300,
         dbWait=0.0,
         time=96.962,
         dbTime=25.439587535,
         objsPerSecond=1921.3712588436706,
         dbObjsPerSecond=7323.231940914407),
 objs=c3.Arry<SurfaceHindcastDataSeries>([c3.SurfaceHindcastDataSeries(
         id='HNDCST_SRFC_0-0',
         meta=c3.Meta(
                created=datetime.datetime(2021, 10, 28, 2, 12, 5, tzinfo=datetime.timezone.utc),
                updated=datetime.datetime(2021, 10, 28, 2, 12, 5, tzinfo=datetime.timezone.utc),
                timestamp=datetime.datetime(2021, 10, 28, 2, 12, 5, tzinfo=datetime.timezone.utc)),
         version=1),
        c3.SurfaceHindcastDataSeries(
         id='HNDCST_SRFC_0-1',
         meta=c3.Meta(
                created=datetime.datetime(2021, 10, 28, 2, 12, 5, tzinfo=datetime.timezone.utc),
           

In [24]:
import time

time.sleep(2)

UsageError: Line magic function `%%time` not found.


In [35]:
def sleeper(n):
    import time
    time.sleep(n)
    return n
sleeper_c3 = c3.Lambda.fromPython(sleeper)
# example execution locally
sleeper_c3.apply([1])

1

In [36]:
actionSpec = c3.AsyncActionSpec(
    typeName = "Lambda",
    action = "apply",
    args = {
        "this": sleeper_c3,
        "args": c3.c3Make("[int]",[2])
    }
)
actionSpec

c3.AsyncActionSpec(
 typeName='Lambda',
 actionName='null',
 args=c3.Mapp<string, any>({'args': c3.Arry<int>([2]),
        'this': c3.Lambda<function(n: any): any>(
                 language='python',
                 implementation='def sleeper(n):\n'
                                 '    import time\n'
                                 '    time.sleep(n)\n'
                                 '    return n')}),
 action='apply')

In [22]:
action = c3.AsyncAction.submit(actionSpec)
aid = action.id
aid

'38c36bb2-63da-452d-a474-5c2e7b74bf26'

In [25]:
action = c3.AsyncAction.get({"id": aid})
# Check if the async action has finished running
action.hasCompleted()

False

In [19]:
action.error

c3.C3Error(
 codes=c3.Arry<string>(['NotClassified']),
 template='{}',
 parameters=c3.Arry<string>(['wrapped ClassCastException: '
              'com.fasterxml.jackson.databind.node.ArrayNode cannot be cast to '
              'c3.love.typesys.Arry',
              'null',
              'java.lang.ClassCastException: '
              'com.fasterxml.jackson.databind.node.ArrayNode cannot be cast to '
              'c3.love.typesys.Arry']),
 id='780.2388',
 key='c3.love.exceptions.C3RuntimeException_wrapIt',
 cause=c3.C3Error(
         template='com.fasterxml.jackson.databind.node.ArrayNode cannot be '
                   'cast to c3.love.typesys.Arry'))

In [26]:
help(c3.c3Make)

Help on TypeSystem in module Z66e905400b22b25861b4c6184de6e7d5M_.TypeSystem object:

class TypeSystem(Z66e905400b22b25861b4c6184de6e7d5M_.TypeSystemBase.TypeSystemBase)
 |  The 'c3' variable is an instance of this class (note that while the 'c3' variable is technically an instance of
 |  class TypeSystem, the class TypeSystem implements almost nothing; all functional behavior is inherited from this
 |  class). This class implements all logic needed to create c3 classes and c3 class instances from information recieved
 |  from c3server.
 |  
 |  Method resolution order:
 |      TypeSystem
 |      Z66e905400b22b25861b4c6184de6e7d5M_.TypeSystemBase.TypeSystemBase
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, *args, **kwargs)
 |      This method creates an instance of TypeSystemBase.
 |      :param connection: A connection to c3server; used to communicate with the server
 |      :param define_types: boolean variable indicating whether we should load all typ