/
__init__.py
1323 lines (1148 loc) · 68.9 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
'''
Base module for STOQS loaders
@author: __author__
@status: __status__
@license: __license__
'''
import os
import sys
app_dir = os.path.join(os.path.dirname(__file__), "../")
sys.path.insert(0, app_dir)
os.environ['DJANGO_SETTINGS_MODULE']='config.settings.local'
import django
django.setup()
from django.conf import settings
from django.contrib.gis.geos import Polygon
from django.db.utils import IntegrityError
from django.db import transaction, DatabaseError
from django.db.models import Max, Min
from stoqs import models as m
from datetime import datetime
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
import time
import re
import math
import numpy as np
from coards import to_udunits
import seawater.eos80 as sw
import csv
import urllib2
import logging
from utils.utils import percentile, median, mode, simplify_points, spiciness
from tempfile import NamedTemporaryFile
import pprint
from netCDF4 import Dataset
import httplib
# When settings.DEBUG is True Django will fill up a hash with stats on every insert done to the database.
# "Monkey patch" the CursorWrapper to prevent this. Otherwise we can't load large amounts of data.
# See http://stackoverflow.com/questions/7768027/turn-off-sql-logging-while-keeping-settings-debug
from django.db.backends.base.base import BaseDatabaseWrapper
from django.db.backends.utils import CursorWrapper
# Constant for ParameterGroup name - for utils/STOQSQmanager.py to use
MEASUREDINSITU = 'Measured in situ'
# Constants for a Resource and ResourceType names - to be also used in utils/Viz
X3D_MODEL = 'X3D_MODEL'
X3D_MODEL_NOMINALDEPTH = 'X3D_MODEL_nominaldepth'
X3D_MODEL_SCALEFACTOR = 'X3D_MODEL_scalefactor'
X3DPLATFORMMODEL = 'x3dplatformmodel'
if settings.DEBUG:
BaseDatabaseWrapper.make_debug_cursor = lambda self, cursor: CursorWrapper(cursor, self)
missing_value = 1e-34
class SkipRecord(Exception):
pass
class HasMeasurement(Exception):
pass
class ParameterNotFound(Exception):
pass
class FileNotFound(Exception):
pass
class LoadScript(object):
'''
Base class for load script to inherit from for reusing common utility methods such
as process_command_line()
'''
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
def __init__(self, base_dbAlias, base_campaignName, description=None, stride=1, x3dTerrains=None, grdTerrain=None):
self.base_dbAlias = base_dbAlias
self.base_campaignName = base_campaignName
self.campaignDescription = description
self.stride = stride
self.x3dTerrains = x3dTerrains
self.grdTerrain = grdTerrain
def process_command_line(self):
'''
The argparse library is included in Python 2.7 and is an added package for STOQS.
Process command line arguments to support these kind of database loads:
- Optimal stride
- Test version
- Uniform stride
Load scripts should have execution code that looks like:
# Execute the load
cl.process_command_line()
if cl.args.test:
##cl.loadDorado(stride=100)
cl.loadM1ts(stride=10)
cl.loadM1met(stride=10)
elif cl.args.optimal_stride:
cl.loadDorado(stride=2)
cl.loadM1ts(stride=1)
cl.loadM1met(stride=1)
else:
cl.stride = cl.args.stride
cl.loadDorado()
cl.loadM1ts()
cl.loadM1met()
Optional arguments for associating X3D Terrains and Viewpoints with this Campaign:
- x3dTerrains: Dict of absolute URL hashes to X3D GeoElevationGrids with hashes of
viewpoint position, orientation, and centerOfRotation
'''
import argparse
from argparse import RawTextHelpFormatter
exampleString = ''
for dbType in ('', '_t', '_o', '_s10'):
if dbType == '':
exampleString = exampleString + ' %s \t# Load full resolution data into %s\n' % (
sys.argv[0], self.base_dbAlias)
elif dbType == '_s10':
exampleString = exampleString + ' %s -%s 10\t# Load data into %s\n' % (
sys.argv[0], dbType[1], self.base_dbAlias + dbType)
else:
exampleString = exampleString + ' %s -%s \t# Load data into %s\n' % (
sys.argv[0], dbType[1], self.base_dbAlias + dbType)
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter,
description='STOQS load script for "%s"' % self.base_campaignName,
epilog='Examples:' + '\n\n' + exampleString + '\n' +
'(Databases must be created, synced and defined in privateSettings - see INSTALL instructions)')
parser.add_argument('--dbAlias', action='store',
help='Database alias (default = %s)' % self.base_dbAlias)
parser.add_argument('--campaignName', action='store',
help='Campaign Name (default = "%s")' % self.base_campaignName)
parser.add_argument('-o', '--optimal_stride', action='store_true',
help='Run load for optimal stride configuration as defined in \n"if cl.args.optimal_stride:" section of load script')
parser.add_argument('-t', '--test', action='store_true',
help='Run load for test configuration as defined in \n"if cl.args.test:" section of load script')
parser.add_argument('-s', '--stride', action='store', type=int, default=1,
help='Stride value (default=1)')
parser.add_argument('-a', '--append', action='store_true',
help='Append data to existing activity - for use in repetative runs')
parser.add_argument('-v', '--verbose', action='store_true',
help='Turn on DEBUG level logging output')
self.args = parser.parse_args()
# Modify base dbAlias with conventional suffix if dbAlias not specified on command line
if not self.args.dbAlias:
if self.args.optimal_stride:
self.dbAlias = self.base_dbAlias + '_o'
elif self.args.test:
self.dbAlias = self.base_dbAlias + '_t'
elif self.args.stride:
if self.args.stride == 1:
self.dbAlias = self.base_dbAlias
else:
self.dbAlias = self.base_dbAlias + '_s%d' % self.args.stride
else:
self.dbAlias = self.args.dbAlias
# Modify base campaignName with conventional suffix if campaignName not specified on command line
if not self.args.campaignName:
if self.args.optimal_stride:
self.campaignName = self.base_campaignName + ' with optimal strides'
elif self.args.test:
self.campaignName = self.base_campaignName + ' for testing'
elif self.args.stride:
if self.args.stride == 1:
self.campaignName = self.base_campaignName
else:
self.campaignName = self.base_campaignName + ' with uniform stride of %d' % self.args.stride
else:
self.campaignName = self.args.campaignName
if self.args.verbose:
self.logger.setLevel(logging.DEBUG)
self.commandline = ' '.join(sys.argv)
self.logger.info('Executing command: %s', self.commandline)
def addTerrainResources(self):
'''
If X3D Terrain information is specified then add as Resources to Campaign. To be called after process_command_line().
'''
if not self.x3dTerrains:
return
# Enable use of this method without calling process_command_line() - as is done in ROVCTDloader.py
if not hasattr(self, 'dbAlias'):
self.dbAlias = self.base_dbAlias
if not hasattr(self, 'campaignName'):
self.campaignName = self.base_campaignName
resourceType, _ = m.ResourceType.objects.using(self.dbAlias).get_or_create(
name='x3dterrain', description='X3D Terrain information for Spatial 3D visualization')
self.logger.info('Adding to ResourceType: %s', resourceType)
self.logger.debug('Looking in database %s for Campaign name = %s', self.dbAlias, self.campaignName)
campaign = m.Campaign.objects.using(self.dbAlias).get(name=self.campaignName)
for url, viewpoint in self.x3dTerrains.iteritems():
self.logger.debug('url = %s, viewpoint = %s', url, viewpoint)
for name, value in viewpoint.iteritems():
resource, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
uristring=url, name=name, value=value, resourcetype=resourceType)
m.CampaignResource.objects.using(self.dbAlias).get_or_create(
campaign=campaign, resource=resource)
self.logger.info('Resource uristring=%s, name=%s, value=%s', url, name, value)
def addPlaybackResources(self, x3dmodelurl, aName):
'''
If X3D scene graph and DOM control information is specified then add as Resources to Activity.
To be called with each file (Activity) load.
'''
resourceType, _ = m.ResourceType.objects.using(self.dbAlias).get_or_create(
name='x3dplayback', description='X3D scene graph for Activity playback')
##resourceType, created = m.ResourceType.objects.using(self.dbAlias).get_or_create(
## name='x3dplaybackhtml', description='X3D DOM control HTML stubs for Activity playback')
self.logger.info('Adding to ResourceType: %s', resourceType)
self.logger.debug('Looking in database %s for Activity name = %s', self.dbAlias, aName)
activity = m.Activity.objects.using(self.dbAlias).get(name=aName)
resource, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
uristring=x3dmodelurl, name=X3D_MODEL, value=
'Output from bed2x3d.py - uristring to be included in GeoLocation node',
resourcetype=resourceType)
m.ActivityResource.objects.using(self.dbAlias).get_or_create(
activity=activity, resource=resource)
self.logger.info('Resource uristring=%s', x3dmodelurl)
def addPlatformResources(self, x3dmodelurl, pName, value='X3D model', nominaldepth=0.0, scalefactor=1.0):
'''Add Resources to Platform. Used initially for adding X3D model of a platform.
Can put additional descriptive information in value option, e.g.: "X3D model
derived from SolidWorks model of ESP and processed through aopt"
'''
resourceType, _ = m.ResourceType.objects.using(self.dbAlias).get_or_create(
name=X3DPLATFORMMODEL, description='X3D scene for model of a platform')
self.logger.info('Adding to ResourceType: %s', resourceType)
self.logger.debug('Looking in database %s for Platform name = %s', self.dbAlias, pName)
platform = m.Platform.objects.using(self.dbAlias).get(name=pName)
r, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
uristring=x3dmodelurl, name=X3D_MODEL, value=value, resourcetype=resourceType)
m.PlatformResource.objects.using(self.dbAlias).get_or_create(
platform=platform, resource=r)
r, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
uristring=x3dmodelurl,
name=X3D_MODEL_NOMINALDEPTH, value=nominaldepth, resourcetype=resourceType)
m.PlatformResource.objects.using(self.dbAlias).get_or_create(
platform=platform, resource=r)
r, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
uristring=x3dmodelurl,
name=X3D_MODEL_SCALEFACTOR, value=scalefactor, resourcetype=resourceType)
m.PlatformResource.objects.using(self.dbAlias).get_or_create(
platform=platform, resource=r)
self.logger.info('Resource uristring=%s', x3dmodelurl)
class STOQS_Loader(object):
'''
The STOQSloaders class contains methods common across all loaders for creating and updating
general STOQS model objects. This is a base class that should be inherited by other
classes customized to read and load various kinds of data from various sources.
Mike McCann
MBARI 26 May 2012
'''
parameter_dict={} # used to cache parameter objects
standard_names = {} # should be defined for each child class
include_names=[] # names to include, if set it is used in conjunction with ignored_names
# Note: if a name is both in include_names and ignored_names it is ignored.
ignored_names=[] # Should be defined for each child class
global_ignored_names = ['longitude','latitude', 'time', 'Time',
'LONGITUDE','LATITUDE','TIME', 'NominalDepth', 'esecs', 'Longitude', 'Latitude',
'DEPTH','depth'] # A list of parameters that should not be imported as parameters
global_dbAlias = ''
logger = logging.getLogger('__main__')
logger.setLevel(logging.INFO)
def __init__(self, activityName, platformName, dbAlias='default', campaignName=None,
campaignDescription=None, activitytypeName=None, platformColor=None,
platformTypeName=None, stride=1):
'''
Intialize with settings that are common for any load of data into STOQS.
@param activityName: A string describing this activity
@param platformName: A string that is the name of the platform. If that name
for a Platform exists in the DB, it will be used.
@param platformColor: An RGB hex string represnting the color of the platform.
@param dbAlias: The name of the database alias as defined in settings.py
@param campaignName: A string describing the Campaign in which this activity belongs,
If that name for a Campaign exists in the DB, it will be used.
@param campaignDescription: A string expanding on the campaignName. It should be a
short phrase expressing the where and why of a campaign.
@param activitytypeName: A string such as 'mooring deployment' or 'AUV mission'
describing type of activity, If that name for a ActivityType
exists in the DB, it will be used.
@param platformTypeName: A string describing the type of platform, e.g.: 'mooring',
'auv'. If that name for a PlatformType exists in the DB,
it will be used.
'''
self.campaignName = campaignName
self.campaignDescription = campaignDescription
self.activitytypeName = activitytypeName
self.platformName = platformName
self.platformColor = platformColor
self.dbAlias = dbAlias
self.platformTypeName = platformTypeName
self.activityName = activityName
self.stride = stride
self.build_standard_names()
def getPlatform(self, name, type_name):
'''
Given just a platform name get a platform object from the STOQS database. If no such object is in the
database then create a new one. Makes use of the MBARI tracking database to keep the names and types
consistent. The intention of the logic here is to make platform settings dynamic, yet consistent in
reusing what is already in the database. There might need to be some independent tests to ensure that
we make no duplicates. The aim is to be case insensitive on the lookup, but to preserve the case of
what is in MBARItracking.
Platform names mut be composed of [a-zA-Z0-9_], containing no special characters or spaces.
'''
##paURL = 'http://odss-staging.shore.mbari.org/trackingdb/platformAssociations.csv'
paURL = 'http://odss.mbari.org/trackingdb/platformAssociations.csv'
##paURL = 'http://192.168.111.177/trackingdb/platformAssociations.csv' # Private URL for host malibu
# Returns lines like:
# PlatformType,PlatformName
# ship,Martin
# ship,PT_LOBOS
# ship,W_FLYER
# ship,W_FLYER
# ship,ZEPHYR
# mooring,Bruce
if re.search('[^a-zA-Z0-9_-]', name):
raise Exception('Platform name = "%s" is not allowed. Name can contain only letters, numbers, "_", and "-"' % name)
self.logger.debug("Opening %s to read platform names for matching to the MBARI tracking database", paURL)
try:
tpHandle = csv.DictReader(urllib2.urlopen(paURL))
except urllib2.URLError as e:
self.logger.warn('Could not open %s', paURL)
self.logger.warn(e)
platformName = ''
try:
for rec in tpHandle:
##print "rec = %s" % rec
if rec['PlatformName'].lower() == name.lower():
platformName = rec['PlatformName']
tdb_platformTypeName = rec['PlatformType']
break
except httplib.IncompleteRead as e:
self.logger.warn(e)
if not platformName:
platformName = name
self.logger.debug("Platform name %s not found in tracking database. Creating new platform anyway.", platformName)
# Preference: 1. __init__(), 2. this function arg, 3. tracking DB
if not self.platformTypeName:
if type_name:
self.platformTypeName = type_name
elif tdb_platformTypeName:
self.platformTypeName = tdb_platformTypeName
else:
self.logger.warn('self.platformTypeName has not been assigned')
# Create PlatformType
self.logger.debug("calling using('%s').get_or-create() on PlatformType for platformTypeName = %s", self.dbAlias, self.platformTypeName)
platformType, created = m.PlatformType.objects.using(self.dbAlias).get_or_create(name = self.platformTypeName)
if created:
self.logger.debug("Created platformType.name %s in database %s", platformType.name, self.dbAlias)
else:
self.logger.debug("Retrieved platformType.name %s from database %s", platformType.name, self.dbAlias)
# Create Platform
platform, created = m.Platform.objects.using(self.dbAlias).get_or_create( name=platformName,
color=self.platformColor,
platformtype=platformType)
if created:
self.logger.info("Created platform %s in database %s", platformName, self.dbAlias)
else:
self.logger.info("Retrieved platform %s from database %s", platformName, self.dbAlias)
return platform
def addParameters(self, parmDict):
'''
Wrapper so as to apply self.dbAlias in the decorator
'''
def innerAddParameters(self, parmDict):
'''
This method is a get_or_create() equivalent, but on steroids. It first tries to find the
parameter in a local cache (a python hash), first by standard_name, then by name. Then it
checks to see if it's in the database. If it's not in the database it will then add it
populating the fields from the attributes of the parameter dictionary that is passed. The
dictionary is patterned after the pydap.model.BaseType variable from the NetCDF file (OPeNDAP URL).
'''
# Go through the keys of the OPeNDAP URL for the dataset and add the parameters as needed to the database
for key in parmDict.keys():
self.logger.debug("key = %s", key)
if (key in self.ignored_names) or (key not in self.include_names): # skip adding parameters that are ignored
continue
v = parmDict[key].attributes
self.logger.debug("v = %s", v)
try:
self.getParameterByName(key)
except ParameterNotFound as e:
self.logger.debug("Parameter not found. Assigning parms from ds variable.")
# Bug in pydap returns a gobbledegook list of things if the attribute value has not been
# set. Check for this on units and override what pydap returns.
if isinstance(v.get('units'), list):
unitStr = ''
else:
unitStr = v.get('units')
parms = {'units': unitStr,
'standard_name': v.get('standard_name'),
'long_name': v.get('long_name'),
'type': v.get('type'),
'description': v.get('description'),
'origin': self.activityName,
'name': key}
self.parameter_dict[key] = m.Parameter(**parms)
try:
sid = transaction.savepoint(using=self.dbAlias)
self.parameter_dict[key].save(using=self.dbAlias)
self.ignored_names.remove(key) # unignore, since a failed lookup will add it to the ignore list.
except IntegrityError as e:
self.logger.warn('%s', e)
transaction.savepoint_rollback(sid)
if str(e).startswith('duplicate key value violates unique constraint "stoqs_parameter_pkey"'):
self.resetParameterAutoSequenceId()
try:
sid2 = transaction.savepoint(using=self.dbAlias)
self.parameter_dict[key].save(using=self.dbAlias)
self.ignored_names.remove(key) # unignore, since a failed lookup will add it to the ignore list.
except Exception as e:
self.logger.error('%s', e)
transaction.savepoint_rollback(sid2,using=self.dbAlias)
raise Exception('''Failed reset auto sequence id on the stoqs_parameter table''')
else:
self.logger.error('Exception %s', e)
raise Exception('''Failed to add parameter for %s
%s\nEither add parameter manually, or add to ignored_names''' % (key,
'\n'.join(['%s=%s' % (k1,v1) for k1,v1 in parms.iteritems()])))
except Exception as e:
self.logger.error('%s', e)
transaction.savepoint_rollback(sid,using=self.dbAlias)
raise Exception('''Failed to add parameter for %s
%s\nEither add parameter manually, or add to ignored_names''' % (key,
'\n'.join(['%s=%s' % (k1,v1) for k1,v1 in parms.iteritems()])))
self.logger.debug("Added parameter %s from data set to database %s", key, self.dbAlias)
return innerAddParameters(self, parmDict)
def createCampaign(self):
'''Create Campaign in the database ensuring that there is only one Campaign
in the database. If supplied Campaign name exists in database then that
campaign is used and supplied Campaign description is ignored.
'''
try:
self.campaign = m.Campaign.objects.using(self.dbAlias).get(id=1)
self.logger.info('Retrieved Campaign = %s', self.campaign)
if self.campaign.name != self.campaignName:
self.logger.warn('Supplied Campaign name of %s does not match name = %s '
'in database %s', self.campaignName, self.campaign.name, self.dbAlias)
self.logger.warn('Using Campaign already existing in the database')
except ObjectDoesNotExist:
self.campaign = m.Campaign(name = self.campaignName,
description = self.campaignDescription)
self.campaign.save(using=self.dbAlias)
self.logger.info('Created campaign = %s', self.campaign)
def createActivity(self):
'''
Use provided activity information to add the activity to the database.
'''
self.logger.info("Creating Activity with startDate = %s and endDate = %s",
self.startDatetime, self.endDatetime)
comment = 'Loaded on %s with these include_names: %s' % (datetime.now(),
' '.join(self.include_names))
self.logger.info("comment = " + comment)
# Create Platform
self.activity, created = m.Activity.objects.using(self.dbAlias).get_or_create(
name = self.activityName,
platform = self.platform,
startdate = self.startDatetime,
enddate = self.endDatetime)
if created:
self.logger.info("Created activity %s in database %s with startDate=%s, endDate = %s",
self.activityName, self.dbAlias, self.startDatetime, self.endDatetime)
else:
self.logger.info("Retrieved activity %s from database %s", self.activityName, self.dbAlias)
# Get or create activityType
if self.activitytypeName is not None:
activityType,created = m.ActivityType.objects.using(self.dbAlias
).get_or_create(name = self.activitytypeName)
self.activityType = activityType
if self.activityType is not None:
self.activity.activitytype = self.activityType
self.createCampaign()
self.activity.campaign = self.campaign
self.activity.save(using=self.dbAlias)
def addResources(self):
'''
Add Resources for this activity, namely the NC_GLOBAL attribute names and values,
and all the attributes for each variable in include_names.
'''
# The source of the data - this OPeNDAP URL
resourceType, _ = m.ResourceType.objects.using(self.dbAlias).get_or_create(name = 'opendap_url')
self.logger.debug("Getting or Creating Resource with name = %s, value = %s", 'opendap_url', self.url )
resource, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
name='opendap_url', value=self.url, uristring=self.url, resourcetype=resourceType)
ar, _ = m.ActivityResource.objects.using(self.dbAlias).get_or_create(
activity=self.activity, resource=resource)
# The NC_GLOBAL attributes from the OPeNDAP URL. Save them all.
self.logger.debug("Getting or Creating ResourceType nc_global...")
self.logger.debug("ds.attributes.keys() = %s", self.ds.attributes.keys() )
if 'NC_GLOBAL' in self.ds.attributes:
resourceType, _ = m.ResourceType.objects.using(self.dbAlias).get_or_create(name = 'nc_global')
for rn, value in self.ds.attributes['NC_GLOBAL'].iteritems():
self.logger.debug("Getting or Creating Resource with name = %s, value = %s", rn, value )
resource, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
name=rn, value=value, resourcetype=resourceType)
ar, _ = m.ActivityResource.objects.using(self.dbAlias).get_or_create(
activity=self.activity, resource=resource)
# Use potentially monkey-patched self.getFeatureType() method to write a correct value - as the UI depends on it
mp_resource, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
name='featureType', value=self.getFeatureType(), resourcetype=resourceType)
ars = m.ActivityResource.objects.using(self.dbAlias).filter(activity=self.activity,
resource__resourcetype=resourceType, resource__name='featureType').select_related('resource')
if not ars:
# There was no featureType NC_GLOBAL in the dataset - associate to the one from self.getFeatureType()
ar, _ = m.ActivityResource.objects.using(self.dbAlias).get_or_create(
activity=self.activity, resource=mp_resource)
for ar in ars:
if ar.resource.value != mp_resource.value:
# Update (override NC_GLOBAL's) with monkey-patched self.getFeatureType()'s value
ars = m.ActivityResource.objects.using(self.dbAlias).filter(activity=self.activity,
resource__resourcetype=resourceType, resource__name='featureType').update(resource=mp_resource)
self.logger.warn('Over-riding featureType from NC_GLOBAL (%s) with monkey-patched value = %s',
ar.resource.value, mp_resource.value)
else:
self.logger.warn("No NC_GLOBAL attribute in %s", self.url)
self.logger.info('Adding attributes of all the variables from the original NetCDF file')
for v in self.include_names + ['altitude']:
self.logger.info('v = %s', v)
try:
for rn, value in self.ds[v].attributes.iteritems():
self.logger.debug("Getting or Creating Resource with name = %s, value = %s", rn, value )
resource, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
name=rn, value=value, resourcetype=resourceType)
ar, _ = m.ParameterResource.objects.using(self.dbAlias).get_or_create(
parameter=self.getParameterByName(v), resource=resource)
except KeyError as e:
# Just skip derived parameters that may have been added for a sub-classed Loader
self.logger.warn('include_name %s is not in %s; assuming it is derived and skipping', v, self.url)
except AttributeError as e:
# Just skip over loaders that don't have the plotTimeSeriesDepth attribute
self.logger.warn('%s for include_name %s in %s. Skipping', e, v, self.url)
self.logger.info('Adding plotTimeSeriesDepth Resource for Parameters we want plotted in Parameter tab')
for v in self.include_names + ['altitude']:
try:
if self.plotTimeSeriesDepth.get(v, False):
self.logger.info('v = %s', v)
try:
uiResType, _ = m.ResourceType.objects.using(self.dbAlias).get_or_create(name='ui_instruction')
resource, _ = m.Resource.objects.using(self.dbAlias).get_or_create(
name='plotTimeSeriesDepth', value=self.plotTimeSeriesDepth[v], resourcetype=uiResType)
ar, _ = m.ParameterResource.objects.using(self.dbAlias).get_or_create(
parameter=self.getParameterByName(v), resource=resource)
except ParameterNotFound as e:
self.logger.warn('Could not add plotTimeSeriesDepth Resource for v = %s: %s', v, e)
except AttributeError as e:
self.logger.warn('Could not add plotTimeSeriesDepth Resource for v = %s: %s', v, e)
def getParameterByName(self, name):
'''
Locate a parameter's object from the database. Cache objects after lookup.
If a standard name is provided we'll look up using it instead, as it's more standard.
@param name: Name of parameter object to lookup/locate
'''
# First try to locate the parameter using the standard name (if we have one)
if name not in self.parameter_dict:
self.logger.debug("'%s' is not in self.parameter_dict", name)
if self.standard_names.get(name) is not None:
self.logger.debug("self.standard_names.get('%s') is not None", name)
try:
self.logger.debug("For name = %s ", name)
self.logger.debug("standard_names = %s", self.standard_names[name])
self.logger.debug("retrieving from database %s", self.dbAlias)
self.parameter_dict[name] = m.Parameter.objects.using(self.dbAlias).get(standard_name = self.standard_names[name][0])
except ObjectDoesNotExist:
pass
except IndexError:
pass
# If we still haven't found the parameter using the standard_name, start looking using the name
if name not in self.parameter_dict:
self.logger.debug("Again '%s' is not in self.parameter_dict", name)
try:
self.logger.debug("trying to get '%s' from database %s...", name, self.dbAlias)
##(parameter, created) = m.Parameter.objects.get(name = name)
self.parameter_dict[name] = m.Parameter.objects.using(self.dbAlias).get(name = name)
self.logger.debug("self.parameter_dict[name].name = %s", self.parameter_dict[name].name)
except ObjectDoesNotExist:
##print >> sys.stderr, "Unable to locate parameter with name %s. Adding to ignored_names list." % (name,)
self.ignored_names.append(name)
raise ParameterNotFound('Parameter %s not found in the cache nor the database' % (name,))
# Finally, since we haven't had an error, we MUST have a parameter for this name. Return it.
self.logger.debug("Returning self.parameter_dict[name].units = %s", self.parameter_dict[name].units)
try:
self.parameter_dict[name].save(using=self.dbAlias)
except Exception as e:
print e
print name
pprint.pprint( self.parameter_dict[name])
return self.parameter_dict[name]
def createMeasurement(self, mtime, depth, lat, lon, nomDepth=None, nomLat=None, nomLong=None):
'''
Create and return a measurement object in the database. The measurement object
is created by first creating an instance of stoqs.models.Instantpoint using the activity,
then by creating an instance of Measurement using the Instantpoint. A reference to
an instance of a stoqs.models.Measurement object is then returned.
@param mtime: A valid datetime instance of a datetime object used to create the Instantpoint
@param depth: The depth for the measurement
@param lat: The latitude (degrees, assumed WGS84) for the measurement
@param lon: The longitude (degrees, assumed WGS84) for the measurement
@param nomDepth: The nominal depth (e.g. for a timeSeriesProfile featureType) measurement
@param nomLat: The nominal latitude (e.g. for a timeSeriesProfile featureType) measurement
@param nomLong: The nominal longitude (e.g. for a timeSeriesProfile featureType) measurement
@return: An instance of stoqs.models.Measurement
'''
# Brute force QC check on depth to remove egregous outliers
minDepth = -1000
maxDepth = 5000
if depth < minDepth or depth > maxDepth:
raise SkipRecord('Bad depth: %s (depth must be between %s and %s)' % (depth, minDepth, maxDepth))
# Brute force QC check on latitude and longitude to remove egregous outliers
if lat < -90 or lat > 90:
raise SkipRecord('Bad lat: %s (latitude must be between %s and %s)' % (lat, -90, 90))
if lon < -720 or lon > 720:
raise SkipRecord('Bad lon: %s (longitude must be between %s and %s)' % (long, -720, 720))
ip, _ = m.InstantPoint.objects.using(self.dbAlias).get_or_create(activity=self.activity, timevalue=mtime)
nl = None
point = 'POINT(%s %s)' % (repr(lon), repr(lat))
if not (nomDepth is None and nomLat is None and nomLong is None):
self.logger.debug('nomDepth = %s nomLat = %s nomLong = %s', nomDepth, nomLat, nomLong)
nom_point = 'POINT(%s %s)' % (repr(nomLong), repr(nomLat))
nl, _ = m.NominalLocation.objects.using(self.dbAlias).get_or_create(depth=repr(nomDepth),
geom=nom_point, activity=self.activity)
try:
measurement, _ = m.Measurement.objects.using(self.dbAlias).get_or_create(instantpoint=ip,
nominallocation=nl, depth=repr(depth), geom=point)
except DatabaseError:
self.logger.exception('''DatabaseError:
It is likely that you need https://code.djangoproject.com/attachment/ticket/16778/postgis-adapter.patch.
Check the STOQS INSTALL file for instructions on Django patch #16778.
It's also likely that creating a nominallocation was attempted on a database that does not have that relation.
Several schema changes were checked into the STOQS repository in June 2013. It's suggested that you
drop the database, recreate it, resync, and reload.
''')
sys.exit(-1)
except Exception as e:
self.logger.error('Exception %s', e)
self.logger.error("Cannot save measurement mtime = %s, long = %s, lat = %s,"
" depth = %s", mtime, repr(long), repr(lat), repr(depth))
raise SkipRecord
return measurement
def preProcessParams(self, row):
'''
This method is designed to perform any final pre-processing, such as adding new
parameters based on existing ones. It's also possible to use this function to remove
additional parameters (if necessary). In particular, this is useful for derived
columns such as chlorophyl count, etc.
@param row: A dictionary representing a single "row" of parameter data to be added to the database.
'''
self.logger.debug(row)
if 'measurement' in row:
# Special for things like LOPC data
raise HasMeasurement()
try:
if (row['longitude'] == missing_value or row['latitude'] == missing_value or
#float(row['longitude']) == 0.0 or float(row['latitude']) == 0.0 or
math.isnan(row['longitude'] ) or math.isnan(row['latitude'])):
raise SkipRecord('Invalid latitude or longitude coordinate')
except KeyError as e:
raise SkipRecord('KeyError: ' + str(e))
# Additional sanity check on latitude and longitude
if row['latitude'] > 90 or row['latitude'] < -90:
raise SkipRecord('Invalid latitude = %s' % row['latitude'])
if row['longitude'] > 720 or row['longitude'] < -720:
raise SkipRecord('Invalid longitude = %s' % row['longitude'])
return row
def checkForValidData(self):
'''
Do a pre- check on the OPeNDAP url for the include_names variables. If there are non-NaN data in
any of the variables return True, otherwise return False.
'''
allNaNFlag = {}
anyValidData = False
self.logger.info("Checking for valid data from %s", self.url)
self.logger.debug("include_names = %s", self.include_names)
for v in self.include_names:
self.logger.debug("v = %s", v)
try:
vVals = self.ds[v][:] # Case sensitive
self.logger.debug(len(vVals))
allNaNFlag[v] = np.isnan(vVals).all()
if not allNaNFlag[v]:
anyValidData = True
except KeyError:
self.logger.debug('Parameter %s not in %s. Skipping.', v, self.ds.keys())
if v.find('.') != -1:
raise Exception('Parameter names must not contain periods - cannot load data. Paramater %s violates CF conventions.' % v)
except ValueError:
pass
self.logger.debug("allNaNFlag = %s", allNaNFlag)
for v in allNaNFlag.keys():
if not allNaNFlag[v]:
self.varsLoaded.append(v)
self.logger.info("Variables that have data: self.varsLoaded = %s", self.varsLoaded)
return anyValidData
def build_standard_names(self):
'''
Create a dictionary that contains keys that are fields, and values that
are the standard names of those fields. Classes that inherit from this
class should set any default standard names at the class level.
'''
if not hasattr(self, 'ds'):
# Loaders (such as those in SampleLoaders.py) may inherit this method and not use OPeNDAP
return
try:
for var in self.ds.keys():
if var in self.standard_names:
continue # don't override pre-specified names
if 'standard_name' in self.ds[var].attributes:
self.standard_names[var]=self.ds[var].attributes['standard_name']
else:
self.standard_names[var]=None # Indicate those without a standard name
except AttributeError as e:
self.logger.warn(e)
@staticmethod
def update_ap_stats(dbAlias, activity, parameters, sampledFlag=False):
'''Update the database with descriptive statistics for parameters
belonging to the activity.
'''
for p in parameters:
ap, _ = m.ActivityParameter.objects.using(dbAlias).get_or_create(
parameter=p, activity=activity)
if sampledFlag:
data = m.SampledParameter.objects.using(dbAlias).filter(
parameter=p, sample__instantpoint__activity=activity
).values_list('datavalue', flat=True)
else:
data = m.MeasuredParameter.objects.using(dbAlias).filter(
parameter=p, measurement__instantpoint__activity=activity
).values_list('datavalue', flat=True)
np_data = np.array([float(d) for d in data])
np_data.sort()
ap.number = len(np_data)
ap.min = np_data.min()
ap.max = np_data.max()
ap.mean = np_data.mean()
ap.median = median(list(np_data))
ap.mode = mode(np_data)
ap.p025 = percentile(list(np_data), 0.025)
ap.p975 = percentile(list(np_data), 0.975)
ap.p010 = percentile(list(np_data), 0.010)
ap.p990 = percentile(list(np_data), 0.990)
ap.save(using=dbAlias)
# Compute and save histogram, use smaller number of bins
# for Sampled Parameters
if sampledFlag:
(counts, bins) = np.histogram(np_data,10)
else:
(counts, bins) = np.histogram(np_data,100)
for i,count in enumerate(counts):
m.ActivityParameterHistogram.objects.using(dbAlias).get_or_create(
activityparameter=ap, bincount=count,
binlo=bins[i], binhi=bins[i+1])
@classmethod
def update_activityparameter_stats(cls, dbAlias, activity, parameters, sampledFlag=False):
'''Class method for update_ap_stats() so that subclasses can call it via
updateActivityParameterStats()
'''
cls.update_ap_stats(dbAlias, activity, parameters, sampledFlag)
def updateActivityParameterStats(self, parameterCounts, sampledFlag=False):
'''
Examine the data for the Activity, compute and update some statistics on the measuredparameters
for this activity. Store the histogram in the associated table.
'''
if self.activity:
a = self.activity
else:
raise Exception('Must have an activity defined in self.activity')
try:
self.update_activityparameter_stats(self.dbAlias, a, parameterCounts, sampledFlag)
except ValueError as e:
self.logger.warn('%s. Likely a dataarray as from LOPC data', e)
except IntegrityError as e:
self.logger.warn('IntegrityError(%s): Cannot create ActivityParameter and '
'updated statistics for Activity %s.', e, a)
self.logger.info('Updated statistics for activity.name = %s', a.name)
def insertSimpleDepthTimeSeries(self, critSimpleDepthTime=10):
'''
Read the time series of depth values for this activity, simplify it and insert the values in the
SimpleDepthTime table that is related to the Activity. This procedure is suitable for only
trajectory data; timeSeriesProfile type data uses another method to produce a collection of
simple depth time series for display in flot.
@param critSimpleDepthTime: An integer for the simplification factor, 10 is course, .0001 is fine
'''
vlqs = m.Measurement.objects.using(self.dbAlias).filter( instantpoint__activity=self.activity,
).values_list('instantpoint__timevalue', 'depth', 'instantpoint__pk')
line = []
pklookup = []
for dt,dd,pk in vlqs:
ems = 1000 * to_udunits(dt, 'seconds since 1970-01-01')
d = float(dd)
line.append((ems,d,))
pklookup.append(pk)
self.logger.debug('line = %s', line)
self.logger.info('Number of points in original depth time series = %d', len(line))
try:
# Original simplify_points code modified: the index from the original line is added as 3rd item in the return
simple_line = simplify_points(line, critSimpleDepthTime)
except IndexError:
simple_line = [] # Likely "list index out of range" from a stride that's too big
self.logger.info('Number of points in simplified depth time series = %d', len(simple_line))
self.logger.debug('simple_line = %s', simple_line)
for t,d,k in simple_line:
try:
ip = m.InstantPoint.objects.using(self.dbAlias).get(id = pklookup[k])
m.SimpleDepthTime.objects.using(self.dbAlias).create(activity = self.activity, instantpoint = ip, depth = d, epochmilliseconds = t)
except ObjectDoesNotExist:
self.logger.warn('InstantPoint with id = %d does not exist; from point at index k = %d', pklookup[k], k)
self.logger.info('Inserted %d values into SimpleDepthTime', len(simple_line))
def saveBottomDepth(self):
@transaction.atomic(using=self.dbAlias)
def _innerSaveBottomDepth(self):
'''
Read the time series of Parameter altitude and add to depth values to compute BottomDepth
and add it to the Measurement so that our Matplotlib plots can also ieasily include the depth profile.
This procedure is suitable for only trajectory data.
'''
mpQS = m.MeasuredParameter.objects.using(self.dbAlias).select_related('measurement'
).filter( datavalue__isnull=False,
measurement__instantpoint__activity=self.activity,
parameter__standard_name='height_above_sea_floor')
count = mpQS.count()
self.logger.info('mpQS.count() = %s', count)
counter = 0
for mp in mpQS:
counter += 1
try:
mp.measurement.bottomdepth = mp.measurement.depth + mp.datavalue
mp.measurement.save(using=self.dbAlias)
except DatabaseError as e:
self.logger.warn(e)
if counter % 10000 == 0:
self.logger.info('%d of %d mp.measurement.bottomdepth records saved', counter, count)
return _innerSaveBottomDepth(self)
def insertSimpleBottomDepthTimeSeries(self, critSimpleBottomDepthTime=10):
@transaction.atomic(using=self.dbAlias)
def _innerInsertSimpleBottomDepthTimeSeries(self, critSimpleBottomDepthTime=10):
'''
Read the bottomdepth from Measurement for the Activity, simplify it
and insert the values in the SimpleBottomDepthTime table that is related to the Activity.
This procedure is suitable for only trajectory data.
@param critSimpleBottomDepthTime: An integer for the simplification factor, 10 is course, .0001 is fine
'''
tbdQS = m.MeasuredParameter.objects.using(self.dbAlias).filter(measurement__instantpoint__activity=self.activity
).values('measurement__instantpoint__timevalue', 'measurement__bottomdepth',
'measurement__instantpoint__id')
count = tbdQS.count()
# simplify_points() has a limit of how many points it can handle
maxRecords = .1e6
stride = 1
if count > maxRecords:
stride = int((count + maxRecords / 2)/ maxRecords)
self.logger.info('Striding tbdQS by %d to be kind to simplify_points()', stride)
# Now, get time and bottomdepth that we just saved for building the SimpleBottomDepth time series
line = []
pklookup = []
i = 0
counter = 0
for tbd in tbdQS:
i += 1
if i % stride == 0:
counter += 1
ems = 1000 * to_udunits(tbd['measurement__instantpoint__timevalue'], 'seconds since 1970-01-01')
if tbd['measurement__bottomdepth']:
line.append( (ems, tbd['measurement__bottomdepth']) )
pklookup.append(tbd['measurement__instantpoint__id'])
if counter % 10000 == 0:
self.logger.info('%d of %d points read', counter, int(count / stride))
if line:
try:
# Original simplify_points code modified: the index from the original line is added as 3rd item in the return
self.logger.info('Calling simplify_points with len(line) = %d', len(line))
simple_line = simplify_points(line, critSimpleBottomDepthTime)
except IndexError:
simple_line = [] # Likely "list index out of range" from a stride that's too big
else:
simple_line = []