Skip to content

Commit

Permalink
#59: started something but it seems difficult to generalize...
Browse files Browse the repository at this point in the history
  • Loading branch information
mortenwh committed Aug 29, 2019
1 parent 2fb3760 commit fad760e
Show file tree
Hide file tree
Showing 9 changed files with 246 additions and 0 deletions.
Empty file.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pythesint as pti

from thredds_crawler.crawl import Crawl

from django.core.management.base import BaseCommand, CommandError

from geospaas.utils.utils import validate_uri
from geospaas.insitu_stationary.models import InsituStationary

def crawl(url, **options):
validate_uri(url)

skips = Crawl.SKIPS + ['.*ncml']
c = Crawl(url, skip=skips, debug=True)
added = 0
for ds in c.datasets:
url = [s.get('url') for s in ds.services if
s.get('service').lower()=='opendap'][0]

# Get platform and instrument
pp = pti.get_gcmd_platform('meteorological stations')
ii = pti.get_gcmd_instrument('in situ/laboratory instruments')
if options['platform']:
pp = pti.get_gcmd_platform(options['platform'])
if options['instrument']:
ii = pti.get_gcmd_instrument(options['instrument'])

if pp and ii:
metno_obs_stat, cr = InsituStationary.objects.get_or_create(url, platform=pp, instrument=ii)
else:
metno_obs_stat, cr = InsituStationary.objects.get_or_create(url)
if cr:
added += 1
print('Added %s, no. %d/%d'%(url, added, len(c.datasets)))
return added

class Command(BaseCommand):
args = '<url> <select>'
help = """
Add observation station metadata to the archive. Note that the default GCMD platform is
'meteorological station', and the default GCMD instrument is 'in situ/laboratory
instruments'. These can be changed by specifying the --platform and --instrument optional
arguments.
Args:
<url>: the url to the thredds server
--platform <platform>: GCMD platform
--instrument <instrument>: GCMD instrument
"""
def add_arguments(self, parser):
parser.add_argument('url', nargs='*', type=str)
parser.add_argument('--platform',
action='store',
default='',
help='''GCMD platform''')
parser.add_argument('--instrument',
action='store',
default='',
help='''GCMD instrument''')

def handle(self, *args, **options):
if not len(options['url'])==1:
raise IOError('Please provide a url to the data')
url = options.pop('url')[0]
added = crawl(url, **options)
self.stdout.write(
'Successfully added metadata of %s observation station datasets' %added)

112 changes: 112 additions & 0 deletions geospaas/insitu_stationary/managers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import netCDF4
import warnings
from dateutil.parser import parse

from django.db import models
from django.contrib.gis.geos import GEOSGeometry

from geospaas.vocabularies.models import Platform
from geospaas.vocabularies.models import Instrument
from geospaas.vocabularies.models import DataCenter
from geospaas.vocabularies.models import Parameter
from geospaas.vocabularies.models import ISOTopicCategory
from geospaas.catalog.models import GeographicLocation
from geospaas.catalog.models import DatasetURI, Source, Dataset, DatasetParameter

# test url
# uri = https://thredds.met.no/thredds/dodsC/met.no/observations/stations/SN99938.nc
class InsituStationaryManager(models.Manager):

def get_or_create(self, uri, *args, **kwargs):
''' Create dataset and corresponding metadata
Parameters:
----------
uri : str
URI to file or stream openable by netCDF4.Dataset
Returns:
-------
dataset and flag
'''
# check if dataset already exists
uris = DatasetURI.objects.filter(uri=uri)
if len(uris) > 0:
return uris[0].dataset, False

try:
nc_dataset = netCDF4.Dataset(uri)
except OSError:
nc_dataset = netCDF4.Dataset(uri+'#fillmismatch')

platform = kwargs.pop('platform', '')
instrument = kwargs.pop('instrument', '')
if platform and instrument:
# set source
pp = Platform.objects.get(
category=platform['Category'],
series_entity=platform['Series_Entity'],
short_name=platform['Short_Name'],
long_name=platform['Long_Name']
)
ii = Instrument.objects.get(
category = instrument['Category'],
instrument_class = instrument['Class'],
type = instrument['Type'],
subtype = instrument['Subtype'],
short_name = instrument['Short_Name'],
long_name = instrument['Long_Name']
)
source = Source.objects.get_or_create(
platform = pp,
instrument = ii)[0]
else:
source = None

station_name = nc_dataset.station_name
longitude = nc_dataset.variables['longitude'][0]
latitude = nc_dataset.variables['latitude'][0]
location = GEOSGeometry('POINT(%s %s)' % (longitude, latitude))

geolocation = GeographicLocation.objects.get_or_create(
geometry=location)[0]

entrytitle = nc_dataset.title
dc = DataCenter.objects.get(short_name='NO/MET')
iso_category = ISOTopicCategory.objects.get(name='Climatology/Meteorology/Atmosphere')
import ipdb
ipdb.set_trace()
summary = nc_dataset.summary

ds = Dataset(
entry_id = nc_dataset.id,
entry_title = entrytitle,
ISO_topic_category = iso_category,
data_center = dc,
summary = summary,
time_coverage_start=parse(nc_dataset.time_coverage_start),
time_coverage_end=parse(nc_dataset.time_coverage_end),
geographic_location=geolocation)
if source:
ds.source = source
ds.save()

ds_uri = DatasetURI.objects.get_or_create(uri=uri, dataset=ds)[0]

# Add dataset parameters
vars = nc_dataset.variables
time = vars.pop('time')
lat = vars.pop('latitude')
lon = vars.pop('longitude')
id = vars.pop('station_id')
for key in vars.keys():
try:
par = Parameter.objects.get(standard_name=vars[key].standard_name)
except Parameter.DoesNotExist as e:
warnings.warn('{}: {}'.format(vars[key].standard_name, e.args[0]))
continue
dsp = DatasetParameter(dataset=ds, parameter=par)
dsp.save()

return ds, True


26 changes: 26 additions & 0 deletions geospaas/insitu_stationary/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 2.2.4 on 2019-08-29 11:42

from django.db import migrations


class Migration(migrations.Migration):

initial = True

dependencies = [
('catalog', '0007_auto_20190626_1313'),
]

operations = [
migrations.CreateModel(
name='MetObsStation',
fields=[
],
options={
'proxy': True,
'indexes': [],
'constraints': [],
},
bases=('catalog.dataset',),
),
]
28 changes: 28 additions & 0 deletions geospaas/insitu_stationary/migrations/0002_auto_20190829_1152.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Generated by Django 2.2.4 on 2019-08-29 11:52

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('catalog', '0007_auto_20190626_1313'),
('insitu_stationary', '0001_initial'),
]

operations = [
migrations.DeleteModel(
name='MetObsStation',
),
migrations.CreateModel(
name='InsituStationary',
fields=[
],
options={
'proxy': True,
'indexes': [],
'constraints': [],
},
bases=('catalog.dataset',),
),
]
Empty file.
11 changes: 11 additions & 0 deletions geospaas/insitu_stationary/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from django.db import models

from geospaas.catalog.models import Dataset as CatalogDataset

from geospaas.insitu_stationary.managers import InsituStationaryManager

class InsituStationary(CatalogDataset):
class Meta:
proxy = True
objects = InsituStationaryManager()

0 comments on commit fad760e

Please sign in to comment.