From eb93bce5a7f853e59568e5ab5af25f2a8476b440 Mon Sep 17 00:00:00 2001 From: mrmiguez Date: Mon, 19 Mar 2018 08:54:11 -0700 Subject: [PATCH] will RTD like this more? --- docs/source/conf.py | 2 +- docs/source/index.rst | 193 +++++++++++++++++- docs/source/{reader.rst => pymods.reader.rst} | 8 - docs/source/pymods.record.rst | 30 +++ docs/source/pymods.rst | 10 - docs/source/{writer.rst => pymods.writer.rst} | 0 docs/source/record.rst | 42 ---- docs/source/usage/quickstart.rst | 191 ----------------- 8 files changed, 222 insertions(+), 254 deletions(-) rename docs/source/{reader.rst => pymods.reader.rst} (65%) create mode 100644 docs/source/pymods.record.rst delete mode 100755 docs/source/pymods.rst rename docs/source/{writer.rst => pymods.writer.rst} (100%) delete mode 100644 docs/source/record.rst delete mode 100755 docs/source/usage/quickstart.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 188aadd..04a91bd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -93,7 +93,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. diff --git a/docs/source/index.rst b/docs/source/index.rst index 8721abe..08e5eb5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,12 +6,201 @@ Welcome to pymods's documentation! ================================== +pymods is utility module for working with the Library of Congress's MODS +XML standard: Metadata Description Schema (MODS). It is a utility +wrapper for the lxml module specific to deserializing data out of +MODSXML into python data types. + +If you need a module to serialize data into MODSXML, see the other +`pymods by Matt Cordial `_. + +Installing +---------- + +Recommended: + +``pip install pymods`` + +Using +----- + +Basics +^^^^^^ + +XML is parsed using the MODSReader class: + +``mods_records = pymods.MODSReader('some_file.xml')`` + +Individual records are stored as an iterator of the MODSRecord object: + +.. code:: python + + In [5]: for record in mods_records: + ....: print(record) + ....: + + + + +MODSReader will work with ``mods:modsCollection`` documents, outputs +from OAI-PMH feeds, or individual MODSXML documents with ``mods:mods`` +as the root element. + +**pymods.MODSRecord** + +The MODSReader class parses each ``mods:mods`` element into a +pymods.MODSRecord object. pymods.MODSRecord is a custom wrapper class +for the lxml.ElementBase class. All children of pymods.Record inherit +the lxml.\_Element and lxml.ElementBase methods. + +.. code:: python + + In [6]: record = next(pymods.MODSReader('example.xml')) + In [7]: print(record.nsmap) + {'dcterms': 'http://purl.org/dc/terms/', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', None: 'http://www.loc.gov/mods/v3', 'flvc': 'info:flvc/manifest/v1', 'xlink': 'http://www.w3.org/1999/xlink', 'mods': 'http://www.loc.gov/mods/v3'} + +.. code:: python + + In [8]: for child in record.iterdescendants(): + ....: print(child.tag) + + {http://www.loc.gov/mods/v3}identifier + {http://www.loc.gov/mods/v3}extension + {info:flvc/manifest/v1}flvc + {info:flvc/manifest/v1}owningInstitution + {info:flvc/manifest/v1}submittingInstitution + {http://www.loc.gov/mods/v3}titleInfo + {http://www.loc.gov/mods/v3}title + {http://www.loc.gov/mods/v3}name + {http://www.loc.gov/mods/v3}namePart + {http://www.loc.gov/mods/v3}role + {http://www.loc.gov/mods/v3}roleTerm + {http://www.loc.gov/mods/v3}roleTerm + {http://www.loc.gov/mods/v3}typeOfResource + {http://www.loc.gov/mods/v3}genre + ... + +Methods +^^^^^^^ + +All functions return data either as a string, list, list of named +tuples. See the appropriate docstring for details. + +.. code:: python + + >>> record.genre? + Type: property + String form: + Docstring: + Accesses mods:genre element. + :return: A list containing Genre elements with term, authority, + authorityURI, and valueURI attributes. + +Examples +-------- + +Importing + +.. code:: python + + from pymods import MODSReader, MODSRecord + +Parsing a file + +.. code:: python + + In [10]: mods = MODSReader('example.xml') + In [11]: for record in mods: + ....: print(record.dates) + ....: + [Date(text='1966-12-08', type='{http://www.loc.gov/mods/v3}dateCreated')] + None + [Date(text='1987-02', type='{http://www.loc.gov/mods/v3}dateIssued')] + +Simple tasks +^^^^^^^^^^^^ + +Generating a title list + +.. code:: python + + In [14]: for record in mods: + ....: print(record.titles) + ....: + ['Fire Line System'] + ['$93,668.90. One Mill Tax Apportioned by Various Ways Proposed'] + ['Broward NOW News: National Organization for Women, February 1987'] + +Creating a subject list + +.. code:: python + + In [17]: for record in mods: + ....: for subject in record.subjects: + ....: print(subject.text) + ....: + Concert halls + Architecture + Architectural drawings + Structural systems + Structural systems drawings + Structural drawings + Safety equipment + Construction + Mechanics + Structural optimization + Architectural design + Fire prevention--Safety measures + Taxes + Tax payers + Tax collection + Organizations + Feminism + Sex discrimination against women + Women's rights + Equal rights amendments + Women--Societies and clubs + National Organization for Women + +More complex tasks +^^^^^^^^^^^^^^^^^^ + +Creating a list of subject URI's only for LCSH subjects + +.. code:: python + + In [18]: for record in mods: + ....: for subject in record.subjects: + ....: if 'lcsh' == subject.authority: + ....: print(subject.uri) + ....: + http://id.loc.gov/authorities/subjects/sh85082767 + http://id.loc.gov/authorities/subjects/sh88004614 + http://id.loc.gov/authorities/subjects/sh85132810 + http://id.loc.gov/authorities/subjects/sh85147343 + +Get URLs for objects using a No Copyright US rightsstatement.org URI + +.. code:: python + + In [23]: for record in mods: + ....: for rights_elem in record.rights + ....: if rights_elem.uri == 'http://rightsstatements.org/vocab/NoC-US/1.0/': + ....: print(record.purl) + ....: + http://purl.flvc.org/fsu/fd/FSU_MSS0204_B01_F10_09 + http://purl.flvc.org/fsu/fd/FSU_MSS2008003_B18_F01_004 + +.. |Build Status| image:: https://travis-ci.org/mrmiguez/pymods.svg?branch=master + :target: https://travis-ci.org/mrmiguez/pymods + .. toctree:: :maxdepth: 4 :caption: Contents: - usage/quickstart - pymods + pymods.reader + pymods.record + pymods.writer Indices and tables ================== diff --git a/docs/source/reader.rst b/docs/source/pymods.reader.rst similarity index 65% rename from docs/source/reader.rst rename to docs/source/pymods.reader.rst index 68ede85..1723a3d 100644 --- a/docs/source/reader.rst +++ b/docs/source/pymods.reader.rst @@ -3,14 +3,6 @@ pymods.reader Module Custom pymods lxml parsers. -**pymods.MODSReader** - -Instantiates the pymods.MODSRecord class. Iterates over mods.mods elements. - -**pymods.OAIReader** - -Instantiates the pymods.OAIRecord class. Iterates record elements in any namespace. - .. toctree:: :maxdepth: 4 :caption: pymods.reader: diff --git a/docs/source/pymods.record.rst b/docs/source/pymods.record.rst new file mode 100644 index 0000000..3dbcb0a --- /dev/null +++ b/docs/source/pymods.record.rst @@ -0,0 +1,30 @@ +pymods.record Module +==================== + +.. py:module::pymods.record + + Custom pymods record classes. + +.. toctree:: + :maxdepth: 4 + :caption: pymods.record: + +.. autoclass:: pymods.Record + :members: + :show-inheritance: + :undoc-members: + +.. autoclass:: pymods.MODSRecord + :members: + :show-inheritance: + :undoc-members: + +.. autoclass:: pymods.OAIRecord + :members: + :show-inheritance: + :undoc-members: + +.. autoclass:: pymods.DCRecord + :members: + :show-inheritance: + :undoc-members: diff --git a/docs/source/pymods.rst b/docs/source/pymods.rst deleted file mode 100755 index ae6faec..0000000 --- a/docs/source/pymods.rst +++ /dev/null @@ -1,10 +0,0 @@ -pymods API -========== - -.. toctree:: - :maxdepth: 4 - :caption: pymods API: - - reader - record - writer \ No newline at end of file diff --git a/docs/source/writer.rst b/docs/source/pymods.writer.rst similarity index 100% rename from docs/source/writer.rst rename to docs/source/pymods.writer.rst diff --git a/docs/source/record.rst b/docs/source/record.rst deleted file mode 100644 index b56cfb8..0000000 --- a/docs/source/record.rst +++ /dev/null @@ -1,42 +0,0 @@ -pymods.record Module -==================== - -Custom pymods record classes. - -**pymods.MODSRecord** - -Defines functions and properties for deserializing MODSXML data into python data types. -Many properties of the MODSRecord class return namedtuples. Named aliases correspond to attributes and features -of the parent XML element. - -**pymods.OAIRecord** - -Provides convenience functions for accessing OAI-PMH record metadata in a variety of namespaces. - -**pymods.DCRecord** - -Basic functions for accessing elements in the Dublin Core and Qualified Dublin Core namespaces. - -.. toctree:: - :maxdepth: 4 - :caption: pymods.record: - -.. autoclass:: pymods.Record - :members: - :show-inheritance: - :undoc-members: - -.. autoclass:: pymods.MODSRecord - :members: - :show-inheritance: - :undoc-members: - -.. autoclass:: pymods.OAIRecord - :members: - :show-inheritance: - :undoc-members: - -.. autoclass:: pymods.DCRecord - :members: - :show-inheritance: - :undoc-members: diff --git a/docs/source/usage/quickstart.rst b/docs/source/usage/quickstart.rst deleted file mode 100755 index 01dd333..0000000 --- a/docs/source/usage/quickstart.rst +++ /dev/null @@ -1,191 +0,0 @@ -pymods -====== - -pymods is utility module for working with the Library of Congress's MODS -XML standard: Metadata Description Schema (MODS). It is a utility -wrapper for the lxml module specific to deserializing data out of -MODSXML into python data types. - -If you need a module to serialize data into MODSXML, see the other -`pymods by Matt Cordial `_. - -Installing -========== - -Recommended: - -``pip install pymods`` - -Using -===== - -Basics ------- - -XML is parsed using the MODSReader class: - -``mods_records = pymods.MODSReader('some_file.xml')`` - -Individual records are stored as an iterator of the MODSRecord object: - -.. code:: python - - In [5]: for record in mods_records: - ....: print(record) - ....: - - - - -MODSReader will work with ``mods:modsCollection`` documents, outputs -from OAI-PMH feeds, or individual MODSXML documents with ``mods:mods`` -as the root element. - -pymods.MODSRecord -^^^^^^^^^^^^^^^^^ - -The MODSReader class parses each ``mods:mods`` element into a -pymods.MODSRecord object. pymods.MODSRecord is a custom wrapper class -for the lxml.ElementBase class. All children of pymods.Record inherit -the lxml.\_Element and lxml.ElementBase methods. - -.. code:: python - - In [6]: record = next(pymods.MODSReader('example.xml')) - In [7]: print(record.nsmap) - {'dcterms': 'http://purl.org/dc/terms/', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', None: 'http://www.loc.gov/mods/v3', 'flvc': 'info:flvc/manifest/v1', 'xlink': 'http://www.w3.org/1999/xlink', 'mods': 'http://www.loc.gov/mods/v3'} - -.. code:: python - - In [8]: for child in record.iterdescendants(): - ....: print(child.tag) - - {http://www.loc.gov/mods/v3}identifier - {http://www.loc.gov/mods/v3}extension - {info:flvc/manifest/v1}flvc - {info:flvc/manifest/v1}owningInstitution - {info:flvc/manifest/v1}submittingInstitution - {http://www.loc.gov/mods/v3}titleInfo - {http://www.loc.gov/mods/v3}title - {http://www.loc.gov/mods/v3}name - {http://www.loc.gov/mods/v3}namePart - {http://www.loc.gov/mods/v3}role - {http://www.loc.gov/mods/v3}roleTerm - {http://www.loc.gov/mods/v3}roleTerm - {http://www.loc.gov/mods/v3}typeOfResource - {http://www.loc.gov/mods/v3}genre - ... - -Methods -------- - -All functions return data either as a string, list, list of named -tuples. See the appropriate docstring for details. - -.. code:: python - - >>> record.genre? - Type: property - String form: - Docstring: - Accesses mods:genre element. - :return: A list containing Genre elements with term, authority, - authorityURI, and valueURI attributes. - -Examples -======== - -Importing - -.. code:: python - - from pymods import MODSReader, MODSRecord - -Parsing a file - -.. code:: python - - In [10]: mods = MODSReader('example.xml') - In [11]: for record in mods: - ....: print(record.dates) - ....: - [Date(text='1966-12-08', type='{http://www.loc.gov/mods/v3}dateCreated')] - None - [Date(text='1987-02', type='{http://www.loc.gov/mods/v3}dateIssued')] - -Simple tasks ------------- - -Generating a title list - -.. code:: python - - In [14]: for record in mods: - ....: print(record.titles) - ....: - ['Fire Line System'] - ['$93,668.90. One Mill Tax Apportioned by Various Ways Proposed'] - ['Broward NOW News: National Organization for Women, February 1987'] - -Creating a subject list - -.. code:: python - - In [17]: for record in mods: - ....: for subject in record.subjects: - ....: print(subject.text) - ....: - Concert halls - Architecture - Architectural drawings - Structural systems - Structural systems drawings - Structural drawings - Safety equipment - Construction - Mechanics - Structural optimization - Architectural design - Fire prevention--Safety measures - Taxes - Tax payers - Tax collection - Organizations - Feminism - Sex discrimination against women - Women's rights - Equal rights amendments - Women--Societies and clubs - National Organization for Women - -More complex tasks ------------------- - -Creating a list of subject URI's only for LCSH subjects - -.. code:: python - - In [18]: for record in mods: - ....: for subject in record.subjects: - ....: if 'lcsh' == subject.authority: - ....: print(subject.uri) - ....: - http://id.loc.gov/authorities/subjects/sh85082767 - http://id.loc.gov/authorities/subjects/sh88004614 - http://id.loc.gov/authorities/subjects/sh85132810 - http://id.loc.gov/authorities/subjects/sh85147343 - -Get URLs for objects using a No Copyright US rightsstatement.org URI - -.. code:: python - - In [23]: for record in mods: - ....: for rights_elem in record.rights - ....: if rights_elem.uri == 'http://rightsstatements.org/vocab/NoC-US/1.0/': - ....: print(record.purl) - ....: - http://purl.flvc.org/fsu/fd/FSU_MSS0204_B01_F10_09 - http://purl.flvc.org/fsu/fd/FSU_MSS2008003_B18_F01_004 - -.. |Build Status| image:: https://travis-ci.org/mrmiguez/pymods.svg?branch=master - :target: https://travis-ci.org/mrmiguez/pymods