-
Notifications
You must be signed in to change notification settings - Fork 11
/
repository.py
1283 lines (1053 loc) · 57.1 KB
/
repository.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os, hashlib, uuid, urllib
from core import Statement, DepositResponse, MediaResourceResponse, DeleteResponse, Auth, AuthException, SwordError, ServiceDocument, SDCollection, EntryDocument, Authenticator, SwordServer, WebUI
from spec import Namespaces, Errors
from lxml import etree
from datetime import datetime
from zipfile import ZipFile
from negotiator import AcceptParameters, ContentType
from info import __version__
from sss_logging import logging
ssslog = logging.getLogger(__name__)
class WebInterface(WebUI):
def get(self, path=None):
if path is not None:
if path.find("/") >= 0:
ip = ItemPage(self.config)
return ip.get_item_page(path)
else:
cp = CollectionPage(self.config)
return cp.get_collection_page(path)
else:
hp = HomePage(self.config)
return hp.get_home_page()
class SSSAuthenticator(Authenticator):
def __init__(self, config):
Authenticator.__init__(self, config)
def basic_authenticate(self, username, password, obo):
# we may have turned authentication off for development purposes
if not self.config.authenticate:
ssslog.info("Authentication is turned OFF")
return Auth(self.config.user)
else:
ssslog.info("Authentication required")
# if the username and password don't match, bounce the user with a 401
# meanwhile if the obo header has been passed but doesn't match the config value also bounce
# with a 401 (I know this is an odd looking if/else but it's for clarity of what's going on
if username != self.config.user or password != self.config.password:
ssslog.info("Authentication Failed; returning 401")
raise AuthException(authentication_failed=True)
elif obo is not None and obo != self.config.obo:
ssslog.info("Authentication Failed with Target Owner Unknown")
# we throw a sword error for TargetOwnerUnknown
raise AuthException(target_owner_unknown=True)
if obo is not None:
return Auth(self.config.user, obo)
return Auth(self.config.user)
class URIManager(object):
"""
Class for providing a single point of access to all identifiers used by SSS
"""
def __init__(self, config):
self.configuration = config
def interpret_statement_path(self, path):
accept_parameters = None
if path.endswith("rdf"):
accept_parameters = AcceptParameters(ContentType("application/rdf+xml"))
path = path[:-4]
elif path.endswith("atom"):
accept_parameters = AcceptParameters(ContentType("application/atom+xml;type=feed"))
path = path[:-5]
return accept_parameters, path
def is_atom_path(self, path):
atom = False
if path.endswith(".atom"):
path = path[:-5]
atom = True
return atom, path
def html_url(self, collection, id=None):
""" The url for the HTML splash page of an object in the store """
if id is not None:
return self.configuration.base_url + "html/" + collection + "/" + id
return self.configuration.base_url + "html/" + collection
def sd_uri(self, sub=True):
uri = self.configuration.base_url + "sd-uri"
if sub:
uri += "/" + str(uuid.uuid4())
return uri
def col_uri(self, id):
""" The url for a collection on the server """
return self.configuration.base_url + "col-uri/" + id
def edit_uri(self, collection, id):
""" The Edit-URI """
return self.configuration.base_url + "edit-uri/" + collection + "/" + id
def em_uri(self, collection, id):
""" The EM-URI """
return self.configuration.base_url + "em-uri/" + collection + "/" + id
def cont_uri(self, collection, id):
""" The Cont-URI """
return self.configuration.base_url + "cont-uri/" + collection + "/" + id
def state_uri(self, collection, id, type):
root = self.configuration.base_url + "state-uri/" + collection + "/" + id
if type == "atom":
return root + ".atom"
elif type == "ore":
return root + ".rdf"
def part_uri(self, collection, id, filename):
""" The URL for accessing the parts of an object in the store """
return self.configuration.base_url + "part-uri/" + collection + "/" + id + "/" + urllib.quote(filename)
def agg_uri(self, collection, id):
return self.configuration.base_url + "agg-uri/" + collection + "/" + id
def atom_id(self, collection, id):
""" An ID to use for Atom Entries """
return "tag:container@sss/" + collection + "/" + id
def interpret_oid(self, oid):
"""
Take an object id from a URL and interpret the collection and id terms.
Returns a tuple of (collection, id)
"""
collection, id = oid.split("/", 1)
return collection, id
def interpret_path(self, path):
"""
Take a file path from a URL and interpret the collection, id and filename terms.
Returns a tuple of (collection, id, filename)
"""
collection, id, fn = path.split("/", 2)
return collection, id, fn
class SSS(SwordServer):
"""
The main SWORD Server class. This class deals with all the CRUD requests as provided by the web.py HTTP
handlers
"""
def __init__(self, config, auth):
SwordServer.__init__(self, config, auth)
# create a DAO for us to use
self.dao = DAO(self.configuration)
# create a Namespace object for us to use
self.ns = Namespaces()
# create a URIManager for us to use
self.um = URIManager(self.configuration)
# URIs to use for the two supported states in SSS
self.in_progress_uri = "http://purl.org/net/sword/state/in-progress"
self.archived_uri = "http://purl.org/net/sword/state/archived"
# the descriptions to associated with the two supported states in SSS
self.states = {
self.in_progress_uri : "The work is currently in progress, and has not passed to a reviewer",
self.archived_uri : "The work has passed through review and is now in the archive"
}
# build the namespace maps that we will use during serialisation
# self.sdmap = {None : self.ns.APP_NS, "sword" : self.ns.SWORD_NS, "atom" : self.ns.ATOM_NS, "dcterms" : self.ns.DC_NS}
self.cmap = {None: self.ns.ATOM_NS}
# self.drmap = {None: self.ns.ATOM_NS, "sword" : self.ns.SWORD_NS, "dcterms" : self.ns.DC_NS}
self.smap = {"rdf" : self.ns.RDF_NS, "ore" : self.ns.ORE_NS, "sword" : self.ns.SWORD_NS}
self.emap = {"sword" : self.ns.SWORD_NS, "atom" : self.ns.ATOM_NS}
def container_exists(self, oid):
# find out some details about the statement we are to deliver
accept_parameters, path = self.um.interpret_statement_path(oid)
return self.exists(path)
def media_resource_exists(self, oid):
# check to see if we're after the .atom version of the content
# also strips the .atom if necessary
atom, path = self.um.is_atom_path(oid)
return self.exists(path)
def exists(self, oid):
"""
Does the specified object id exist?
"""
collection, id = oid.split("/", 1)
return self.dao.collection_exists(collection) and self.dao.container_exists(collection, id)
def service_document(self, path=None):
"""
Construct the Service Document. This takes the set of collections that are in the store, and places them in
an Atom Service document as the individual entries
"""
use_sub = self.configuration.use_sub if path is None else False
service = ServiceDocument(version=self.configuration.sword_version,
max_upload_size=self.configuration.max_upload_size)
# now for each collection create an sdcollection
collections = []
for col_name in self.dao.get_collection_names():
href = self.um.col_uri(col_name)
title = "Collection " + col_name
policy = "Collection Policy"
abstract = "Collection Description"
mediation = self.configuration.mediation
treatment = "Treatment description"
# content types accepted
accept = []
multipart_accept = []
if not self.configuration.accept_nothing:
if self.configuration.app_accept is not None:
for acc in self.configuration.app_accept:
accept.append(acc)
if self.configuration.multipart_accept is not None:
for acc in self.configuration.multipart_accept:
multipart_accept.append(acc)
# SWORD packaging formats accepted
accept_package = []
for format in self.configuration.sword_accept_package:
accept_package.append(format)
# provide a sub service element if appropriate
subservice = []
if use_sub:
subservice.append(self.um.sd_uri(True))
col = SDCollection(href=href, title=title, accept=accept, multipart_accept=multipart_accept,
description=abstract, accept_package=accept_package,
collection_policy=policy, mediation=mediation, treatment=treatment,
sub_service=subservice)
collections.append(col)
service.add_workspace("Main Site", collections)
# serialise and return
return service.serialise()
def list_collection(self, id):
"""
List the contents of a collection identified by the supplied id
"""
# FIXME: would be good to have this in the generic implementation (section
# 6.2), but that's a future task; for the time being this remains a
# repository specific piece of code, and a generic implementation will
# be done later
# create an empty feed element for the collection
feed = etree.Element(self.ns.ATOM + "feed", nsmap=self.cmap)
title = etree.SubElement(feed, self.ns.ATOM + "title")
title.text = "Title: " + id
myid = etree.SubElement(feed, self.ns.ATOM + "id")
myid.text = self.um.col_uri(id)
atomlink = etree.SubElement(feed, self.ns.ATOM + "link")
atomlink.set('rel', 'self')
atomlink.set('href', self.um.col_uri(id))
updated = etree.SubElement(feed, self.ns.ATOM + "updated")
updated.text = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
author = etree.SubElement(feed, self.ns.ATOM + "author")
authorname = etree.SubElement(author, self.ns.ATOM + "name")
authorname.text = "Simple Sword Server"
# if the collection path does not exist, then return the empty feed
cpath = os.path.join(self.configuration.store_dir, str(id))
if not os.path.exists(cpath):
return etree.tostring(feed, pretty_print=True)
# list all of the containers in the collection
parts = os.listdir(cpath)
for part in parts:
entry = etree.SubElement(feed, self.ns.ATOM + "entry")
entrytitle = etree.SubElement(entry, self.ns.ATOM + "title")
entrytitle.text = "Title: " + part
entryid = etree.SubElement(entry, self.ns.ATOM + "id")
entryid.text = self.um.edit_uri(id, part)
entryupdated = etree.SubElement(entry, self.ns.ATOM + "updated")
entryupdated.text = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
summary = etree.SubElement(entry, self.ns.ATOM + "summary")
summary.text = "Summary for " + part
link = etree.SubElement(entry, self.ns.ATOM + "link")
link.set("rel", "edit")
link.set("href", self.um.edit_uri(id, part))
link2 = etree.SubElement(entry, self.ns.ATOM + "link")
link2.set("rel", "alternate")
link2.set("type", "text/html")
link2.set("href", self.um.edit_uri(id, part))
# pretty print and return
return etree.tostring(feed, pretty_print=True)
def deposit_new(self, collection, deposit):
"""
Take the supplied deposit and treat it as a new container with content to be created in the specified collection
Args:
-collection: the ID of the collection to be deposited into
-deposit: the DepositRequest object to be processed
Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error
"""
# check for standard possible errors, raises an exception if appropriate
self.check_deposit_errors(deposit)
# does the collection directory exist? If not, we can't do a deposit
if not self.dao.collection_exists(collection):
raise SwordError(status=404, empty=True)
# create us a new container, passing in the Slug value (which may be None) as the proposed id
id = self.dao.create_container(collection, deposit.slug)
# store the incoming atom document if necessary
if deposit.atom is not None:
entry_ingester = self.configuration.get_entry_ingester()(self.dao)
entry_ingester.ingest(collection, id, deposit.atom)
# store the content file if one exists, and do some processing on it
deposit_uri = None
derived_resource_uris = []
if deposit.content is not None:
if deposit.filename is None:
deposit.filename = "unnamed.file"
fn = self.dao.store_content(collection, id, deposit.content, deposit.filename)
# now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
# all the metadata and any files we want
# FIXME: because the deposit interpreter doesn't deal with multipart properly
# we don't get the correct packaging format here if the package is anything
# other than Binary
ssslog.info("attempting to load ingest packager for format " + str(deposit.packaging))
packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
# An identifier which will resolve to the package just deposited
deposit_uri = self.um.part_uri(collection, id, fn)
# a list of identifiers which will resolve to the derived resources
derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)
# the aggregation uri
agg_uri = self.um.agg_uri(collection, id)
# the Edit-URI
edit_uri = self.um.edit_uri(collection, id)
# State information
state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
state_description = self.states[state_uri]
# create the initial statement
s = Statement()
s.aggregation_uri = agg_uri
s.rem_uri = edit_uri
by = deposit.auth.username if deposit.auth is not None else None
obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
if deposit_uri is not None:
s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
s.aggregates = derived_resource_uris
s.add_state(state_uri, state_description)
# store the statement by itself
self.dao.store_statement(collection, id, s)
# create the basic deposit receipt (which involves getting hold of the item's metadata first if it exists)
metadata = self.dao.get_metadata(collection, id)
receipt = self.deposit_receipt(collection, id, deposit, s, metadata)
# store the deposit receipt
self.dao.store_deposit_receipt(collection, id, receipt)
# now augment the receipt with the details of this particular deposit
# this handles None arguments, and converts the xml receipt into a string
receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)
# finally, assemble the deposit response and return
dr = DepositResponse()
dr.receipt = receipt.serialise()
dr.location = edit_uri
dr.created = True
return dr
def get_part(self, path):
"""
Get a file handle to the part identified by the supplied path
- path: The URI part which is the path to the file
"""
collection, id, fn = self.um.interpret_path(path)
if self.dao.file_exists(collection, id, fn):
route = self.dao.get_store_path(collection, id, fn)
return open(route, "r")
else:
return None
def get_media_resource(self, oid, accept_parameters):
"""
Get a representation of the media resource for the given id as represented by the specified content type
-id: The ID of the object in the store
-content_type A ContentType object describing the type of the object to be retrieved
"""
# by the time this is called, we should already know that we can return this type, so there is no need for
# any checking, we just get on with it
# requesting from the atom URI will get you the atom format, irrespective
# of the content negotiation
atom, path = self.um.is_atom_path(oid)
if atom:
ssslog.info("Received request for atom feed form of media resource")
accept_parameters = AcceptParameters(ContentType("application/atom+xml;type=feed"))
else:
ssslog.info("Received request for package form of media resource")
# did we successfully negotiate a content type?
if accept_parameters is None:
raise SwordError(error_uri=Errors.content, status=406, msg="Requsted Accept/Accept-Packaging is not supported by this server")
ssslog.info("Request media type with media format: " + accept_parameters.media_format())
# ok, so break the id down into collection and object
collection, id = self.um.interpret_oid(path)
# make a MediaResourceResponse object for us to use
mr = MediaResourceResponse()
# if the type/subtype is text/html, then we need to do a redirect. This is equivalent to redirecting the
# client to the splash page of the item on the server
if accept_parameters.content_type.mimetype() == "text/html":
ssslog.info("Requested format is text/html ... redirecting client to web ui")
mr.redirect = True
mr.url = self.um.html_url(collection, id)
return mr
# call the appropriate packager, and get back the filepath for the response
packager = self.configuration.get_package_disseminator(accept_parameters.media_format())(self.dao, self.um)
mr.filepath = packager.package(collection, id)
mr.packaging = packager.get_uri()
mr.content_type = accept_parameters.content_type.mimetype()
return mr
def replace(self, oid, deposit):
"""
Replace all the content represented by the supplied id with the supplied deposit
Args:
- oid: the object ID in the store
- deposit: a DepositRequest object
Return a DepositResponse containing the Deposit Receipt or a SWORD Error
"""
# check for standard possible errors, raises an exception if appropriate
self.check_deposit_errors(deposit)
collection, id = self.um.interpret_oid(oid)
# does the object directory exist? If not, we can't do a deposit
if not self.exists(oid):
return SwordError(status=404, empty=True)
# first figure out what to do about the metadata
keep_atom = False
if deposit.atom is not None:
ssslog.info("Replace request has ATOM part - updating")
entry_ingester = self.configuration.get_entry_ingester()(self.dao)
entry_ingester.ingest(collection, id, deposit.atom)
keep_atom = True
deposit_uri = None
derived_resource_uris = []
if deposit.content is not None:
ssslog.info("Replace request has file content - updating")
# remove all the old files before adding the new. We always leave
# behind the metadata; this will be overwritten later if necessary
self.dao.remove_content(collection, id, True, keep_atom)
# store the content file
if deposit.filename is None:
deposit.filename = "unnamed.file"
fn = self.dao.store_content(collection, id, deposit.content, deposit.filename)
ssslog.debug("New incoming file stored with filename " + fn)
# now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
# all the metadata and any files we want. Notice that we pass in the metadata_relevant flag, so the
# packager won't overwrite the existing metadata if it isn't supposed to
packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
ssslog.debug("Resources derived from deposit: " + str(derived_resources))
# a list of identifiers which will resolve to the derived resources
derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)
# An identifier which will resolve to the package just deposited
deposit_uri = self.um.part_uri(collection, id, fn)
# the aggregation uri
agg_uri = self.um.agg_uri(collection, id)
# the Edit-URI
edit_uri = self.um.edit_uri(collection, id)
# State information
state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
state_description = self.states[state_uri]
# create the new statement
s = Statement()
s.aggregation_uri = agg_uri
s.rem_uri = edit_uri
if deposit_uri is not None:
by = deposit.auth.username if deposit.auth is not None else None
obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
s.add_state(state_uri, state_description)
s.aggregates = derived_resource_uris
# store the statement by itself
self.dao.store_statement(collection, id, s)
# create the deposit receipt (which involves getting hold of the item's metadata first if it exists
metadata = self.dao.get_metadata(collection, id)
receipt = self.deposit_receipt(collection, id, deposit, s, metadata)
# store the deposit receipt also
self.dao.store_deposit_receipt(collection, id, receipt)
# now augment the receipt with the details of this particular deposit
# this handles None arguments, and converts the xml receipt into a string
receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)
# finally, assemble the deposit response and return
dr = DepositResponse()
dr.receipt = receipt.serialise()
dr.location = edit_uri
dr.created = True
return dr
def delete_content(self, oid, delete):
"""
Delete all of the content from the object identified by the supplied id. the parameters of the delete
request must also be supplied
- oid: The ID of the object to delete the contents of
- delete: The DeleteRequest object
Return a DeleteResponse containing the Deposit Receipt or the SWORD Error
"""
ssslog.info("Deleting content of resource " + oid)
# check for standard possible errors, this throws an error if appropriate
self.check_delete_errors(delete)
collection, id = self.um.interpret_oid(oid)
# does the collection directory exist? If not, we can't do a deposit
if not self.exists(oid):
raise SwordError(status=404, empty=True)
# remove all the old files before adding the new.
# notice that we keep the metadata, as this is considered bound to the
# container and not the media resource.
self.dao.remove_content(collection, id, True)
# the aggregation uri
agg_uri = self.um.agg_uri(collection, id)
# the Edit-URI
edit_uri = self.um.edit_uri(collection, id)
# State information
state_uri = self.in_progress_uri if delete.in_progress else self.archived_uri
state_description = self.states[state_uri]
# create the statement
s = Statement()
s.aggregation_uri = agg_uri
s.rem_uri = edit_uri
s.add_state(state_uri, state_description)
# store the statement by itself
self.dao.store_statement(collection, id, s)
# create the deposit receipt (which involves getting hold of the item's metadata first if it exists
metadata = self.dao.get_metadata(collection, id)
receipt = self.deposit_receipt(collection, id, delete, s, metadata)
# store the deposit receipt also
self.dao.store_deposit_receipt(collection, id, receipt)
# finally, assemble the delete response and return
dr = DeleteResponse()
dr.receipt = receipt.serialise()
return dr
def add_content(self, oid, deposit):
"""
Take the supplied deposit and treat it as a new container with content to be created in the specified collection
Args:
-collection: the ID of the collection to be deposited into
-deposit: the DepositRequest object to be processed
Returns a DepositResponse object which will contain the Deposit Receipt or a SWORD Error
"""
ssslog.info("Adding content to media resource of container " + oid)
# check for standard possible errors, raises an exception if appropriate
self.check_deposit_errors(deposit)
collection, id = self.um.interpret_oid(oid)
# does the collection directory exist? If not, we can't do a deposit
if not self.exists(oid):
raise SwordError(status=404, empty=True)
# State information
state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
state_description = self.states[state_uri]
# load the statement
s = self.dao.load_statement(collection, id)
s.set_state(state_uri, state_description)
# store the content file if one exists, and do some processing on it
location_uri = None
deposit_uri = None
derived_resource_uris = []
if deposit.content is not None:
ssslog.debug("Add request contains content part")
if deposit.filename is None:
deposit.filename = "unnamed.file"
fn = self.dao.store_content(collection, id, deposit.content, deposit.filename)
ssslog.debug("New incoming file stored with filename " + fn)
packager = self.configuration.get_package_ingester(deposit.packaging)(self.dao)
derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
ssslog.debug("Resources derived from deposit: " + str(derived_resources))
# An identifier which will resolve to the package just deposited
deposit_uri = self.um.part_uri(collection, id, fn)
by = deposit.auth.username if deposit.auth is not None else None
obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
# a list of identifiers which will resolve to the derived resources
derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)
# decide on the location URI (it differs depending on whether this was
# an unpackable resource or not
if deposit.packaging == "http://purl.org/net/sword/package/Binary":
location_uri = deposit_uri
else:
location_uri = self.um.em_uri(collection, id)
# store the statement by itself
self.dao.store_statement(collection, id, s)
# create the deposit receipt (which involves getting hold of the item's metadata first if it exists
metadata = self.dao.get_metadata(collection, id)
receipt = self.deposit_receipt(collection, id, deposit, s, metadata)
# store the deposit receipt also
self.dao.store_deposit_receipt(collection, id, receipt)
# now augment the receipt with the details of this particular deposit
# this handles None arguments, and converts the xml receipt into a string
receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)
# finally, assemble the deposit response and return
dr = DepositResponse()
dr.receipt = receipt.serialise()
dr.location = location_uri
dr.created = True
return dr
def get_edit_uri(self, path):
col, oid = self.um.interpret_oid(path)
return self.um.edit_uri(col, oid)
def get_container(self, oid, accept_parameters):
"""
Get a representation of the container in the requested content type
Args:
-oid: The ID of the object in the store
-content_type A ContentType object describing the required format
Returns a representation of the container in the appropriate format
"""
# by the time this is called, we should already know that we can return this type, so there is no need for
# any checking, we just get on with it
ssslog.info("Container requested in mime format: " + accept_parameters.content_type.mimetype())
# ok, so break the id down into collection and object
collection, id = self.um.interpret_oid(oid)
# pick either the deposit receipt or the pure statement to return to the client
if accept_parameters.content_type.mimetype() == "application/atom+xml;type=entry":
return self.dao.get_deposit_receipt_content(collection, id)
elif accept_parameters.content_type.mimetype() == "application/rdf+xml":
return self.dao.get_statement_content(collection, id)
elif accept_parameters.content_type.mimetype() == "application/atom+xml;type=feed":
return self.dao.get_statement_feed(collection, id)
else:
ssslog.info("Requested mimetype not recognised/supported: " + accept_parameters.content_type.mimetype())
return None
def deposit_existing(self, oid, deposit):
"""
Deposit the incoming content into an existing object as identified by the supplied identifier
Args:
-oid: The ID of the object we are depositing into
-deposit: The DepositRequest object
Returns a DepositResponse containing the Deposit Receipt or a SWORD Error
"""
ssslog.debug("Deposit onto an existing container " + oid)
# check for standard possible errors, raises an exception if appropriate
self.check_deposit_errors(deposit)
collection, id = self.um.interpret_oid(oid)
# does the collection directory exist? If not, we can't do a deposit
if not self.exists(oid):
raise SwordError(status=404, empty=True)
# State information
state_uri = self.in_progress_uri if deposit.in_progress else self.archived_uri
state_description = self.states[state_uri]
# load the statement
s = self.dao.load_statement(collection, id)
# do the in-progress first, as some deposits will be empty, and will
# just be telling us that the client has finished working on this item
s.set_state(state_uri, state_description)
# just do some useful logging
if deposit.atom is None and deposit.content is None:
ssslog.info("Empty deposit request; therefore this is just completing a previously incomplete deposit")
# now just store the atom file and the content (this may overwrite an existing atom document - this is
# intentional, although real servers would augment the existing metadata rather than overwrite)
if deposit.atom is not None:
ssslog.info("Append request has ATOM part - adding")
# when we ingest the atom file, the existing atom doc may get overwritten,
# but the spec requires that we only add metadata, not overwrite anything
# (if possible). For a purist implementation, then, we mark additive=True
# in the call to the ingest method, so all metadata is added to whatever
# is already there
entry_ingester = self.configuration.get_entry_ingester()(self.dao)
entry_ingester.ingest(collection, id, deposit.atom, True)
# store the content file
deposit_uri = None
derived_resource_uris = []
if deposit.content is not None:
ssslog.info("Append request has file content - adding to media resource")
if deposit.filename is None:
deposit.filename = "unnamed.file"
fn = self.dao.store_content(collection, id, deposit.content, deposit.filename)
ssslog.debug("New incoming file stored with filename " + fn)
# now that we have stored the atom and the content, we can invoke a package ingester over the top to extract
# all the metadata and any files we want. Notice that we pass in the metadata_relevant flag, so the packager
# won't overwrite the metadata if it isn't supposed to
pclass = self.configuration.get_package_ingester(deposit.packaging)
if pclass is not None:
packager = pclass(self.dao)
derived_resources = packager.ingest(collection, id, fn, deposit.metadata_relevant)
ssslog.debug("Resources derived from deposit: " + str(derived_resources))
# a list of identifiers which will resolve to the derived resources
derived_resource_uris = self.get_derived_resource_uris(collection, id, derived_resources)
# An identifier which will resolve to the package just deposited
deposit_uri = self.um.part_uri(collection, id, fn)
# add the new deposit
by = deposit.auth.username if deposit.auth is not None else None
obo = deposit.auth.on_behalf_of if deposit.auth is not None else None
s.original_deposit(deposit_uri, datetime.now(), deposit.packaging, by, obo)
# add the new list of aggregations to the existing list, allowing the
# statement to ensure that the list is normalised (only consisting of
# unique uris)
s.add_normalised_aggregations(derived_resource_uris)
# store the statement by itself
self.dao.store_statement(collection, id, s)
# create the deposit receipt (which involves getting hold of the item's metadata first if it exists
metadata = self.dao.get_metadata(collection, id)
receipt = self.deposit_receipt(collection, id, deposit, s, metadata)
# store the deposit receipt also
self.dao.store_deposit_receipt(collection, id, receipt)
# now augment the receipt with the details of this particular deposit
# this handles None arguments, and converts the xml receipt into a string
receipt = self.augmented_receipt(receipt, deposit_uri, derived_resource_uris)
# finally, assemble the deposit response and return
dr = DepositResponse()
dr.receipt = receipt.serialise()
# NOTE: in the spec, this is different for 6.7.2 and 6.7.3 (edit-iri and eiri respectively)
# in this case, we have always gone for the approach of 6.7.2, and contend that the
# spec is INCORRECT for 6.7.3 (also, section 9.3, which comes into play here
# also says use the edit-uri)
dr.location = self.um.edit_uri(collection, id)
dr.created = True
return dr
def delete_container(self, oid, delete):
"""
Delete the entire object in the store
Args:
-oid: The ID of the object in the store
-delete: The DeleteRequest object
Return a DeleteResponse object with may contain a SWORD Error document or nothing at all
"""
# check for standard possible errors, and throw if appropriate
self.check_delete_errors(delete)
collection, id = self.um.interpret_oid(oid)
# does the collection directory exist? If not, we can't do a deposit
if not self.exists(oid):
return SwordError(status=404, empty=True)
# request the deletion of the container
self.dao.remove_container(collection, id)
return DeleteResponse()
def get_derived_resource_uris(self, collection, id, derived_resource_names):
uris = []
for name in derived_resource_names:
uris.append(self.um.part_uri(collection, id, name))
return uris
def augmented_receipt(self, receipt, original_deposit_uri, derived_resource_uris=[]):
receipt.original_deposit_uri = original_deposit_uri
receipt.derived_resource_uris = derived_resource_uris
return receipt
def deposit_receipt(self, collection, id, deposit, statement, metadata):
"""
Construct a deposit receipt document for the provided URIs
Args:
-deposit_id: The Atom Entry ID to use
-cont_uri: The Cont-URI from which the media resource content can be retrieved
-em_uri: The EM-URI (Edit Media) at which operations on the media resource can be conducted
-edit_uri: The Edit-URI at which operations on the container can be conducted
-statement: A Statement object to be embedded in the receipt as foreign markup (deprecated)
Returns a string representation of the deposit receipt
"""
# assemble the URIs we are going to need
# the atom entry id
drid = self.um.atom_id(collection, id)
# the Cont-URI
cont_uri = self.um.cont_uri(collection, id)
# the EM-URI
em_uri = self.um.em_uri(collection, id)
em_uris = [(em_uri, None), (em_uri + ".atom", "application/atom+xml;type=feed")]
# the Edit-URI and SE-IRI
edit_uri = self.um.edit_uri(collection, id)
se_uri = edit_uri
# the splash page URI
splash_uri = self.um.html_url(collection, id)
# the two statement uris
atom_statement_uri = self.um.state_uri(collection, id, "atom")
ore_statement_uri = self.um.state_uri(collection, id, "ore")
state_uris = [(atom_statement_uri, "application/atom+xml;type=feed"), (ore_statement_uri, "application/rdf+xml")]
# ensure that there is a metadata object, and that it is populated with enough information to build the
# deposit receipt
if metadata is None:
metadata = {}
if not metadata.has_key("title"):
metadata["title"] = ["SWORD Deposit"]
if not metadata.has_key("creator"):
metadata["creator"] = ["SWORD Client"]
if not metadata.has_key("abstract"):
metadata["abstract"] = ["Content deposited with SWORD client"]
packaging = []
for disseminator in self.configuration.sword_disseminate_package:
packaging.append(disseminator)
verbose_description = "SSS has done this, that and the other to process the deposit"
treatment="Treatment description"
# Now assemble the deposit receipt
dr = EntryDocument(atom_id=drid, alternate_uri=splash_uri, content_uri=cont_uri,
edit_uri=edit_uri, se_uri=se_uri, em_uris=em_uris,
packaging=packaging, state_uris=state_uris, dc_metadata=metadata,
verbose_description=verbose_description, treatment=treatment)
return dr
def get_statement(self, oid):
accept_parameters, path = self.um.interpret_statement_path(oid)
collection, id = self.um.interpret_oid(path)
if accept_parameters.content_type.mimetype() == "application/rdf+xml":
return self.dao.get_statement_content(collection, id)
elif accept_parameters.content_type.mimetype() == "application/atom+xml;type=feed":
return self.dao.get_statement_feed(collection, id)
else:
return None
def check_delete_errors(self, delete):
# have we been asked to do a mediated delete, when this is not allowed?
if delete.auth is not None:
if delete.auth.on_behalf_of is not None and not self.configuration.mediation:
raise SwordError(Errors.mediation_not_allowed)
def check_deposit_errors(self, deposit):
# have we been asked for an invalid package format
if deposit.packaging == self.configuration.error_content_package:
raise SwordError(error_uri=Errors.content, status=415, msg="Unsupported Packaging format specified")
# have we been given an incompatible MD5?
if deposit.content_md5 is not None:
m = hashlib.md5()
m.update(deposit.content)
digest = m.hexdigest()
if digest != deposit.content_md5:
raise SwordError(error_uri=Errors.checksum_mismatch, msg="Content-MD5 header does not match file checksum")
# have we been asked to do a mediated deposit, when this is not allowed?
if deposit.auth is not None:
if deposit.auth.on_behalf_of is not None and not self.configuration.mediation:
raise SwordError(error_uri=Errors.mediation_not_allowed)
return None
class DAO(object):
"""
Data Access Object for interacting with the store
"""
def __init__(self, config):
"""
Initialise the DAO. This creates the store directory in the Configuration() object if it does not already
exist and will construct the relevant number of fake collections. In general if you make changes to the
number of fake collections you want to have, it's best just to burn the store and start from scratch, although
this method will check to see that it has enough fake collections and make up the defecit, but it WILL NOT
remove excess collections
"""
self.configuration = config
# first thing to do is create the store if it does not already exist
print self.configuration.store_dir
if not os.path.exists(self.configuration.store_dir):
os.makedirs(self.configuration.store_dir)
# now construct the fake collections
current_cols = os.listdir(self.configuration.store_dir)
create = self.configuration.num_collections - len(current_cols)
for i in range(create):
name = str(uuid.uuid4())
cdir = os.path.join(self.configuration.store_dir, name)
os.makedirs(cdir)
self.ns = Namespaces()
self.mdmap = {None : self.ns.DC_NS}
def get_collection_names(self):
""" list all the collections in the store """
return os.listdir(self.configuration.store_dir)
def collection_exists(self, collection):
"""
Does the specified collection exist?
Args:
-collection: the Collection name
Returns true or false
"""
cdir = os.path.join(self.configuration.store_dir, collection)
return os.path.exists(cdir)
def container_exists(self, collection, id):
"""
Does the specified container exist? If the collection does not exist this will still return and will return
false
Args:
-collection: the Collection name
-id: the container id
Returns true or false
"""
odir = os.path.join(self.configuration.store_dir, collection, id)