diff --git a/.gitignore b/.gitignore index e24445137..a5b30da90 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,16 @@ .coverage +.vscode +.idea +.pytest_cache/ /dist/ /docs/.build/ /*.egg-info *.pyc -.pytest_cache/ _scratch/ Session.vim /.tox/ +/build/ +build-install.bat +app.xml +core.xml +editor-appproperties.py \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 000000000..13566b81b --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/aws.xml b/.idea/aws.xml new file mode 100644 index 000000000..03f1bb6ee --- /dev/null +++ b/.idea/aws.xml @@ -0,0 +1,17 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 000000000..32f31c08c --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,21 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..105ce2da2 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..84d22c582 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..30aae523d --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/python-docx.iml b/.idea/python-docx.iml new file mode 100644 index 000000000..0958f8f57 --- /dev/null +++ b/.idea/python-docx.iml @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..94a25f7f4 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/docx/__init__.py b/docx/__init__.py index 59756c021..d843da579 100644 --- a/docx/__init__.py +++ b/docx/__init__.py @@ -10,6 +10,7 @@ from docx.opc.constants import CONTENT_TYPE as CT, RELATIONSHIP_TYPE as RT from docx.opc.part import PartFactory from docx.opc.parts.coreprops import CorePropertiesPart +from docx.opc.parts.extendedprops import ExtendedPropertiesPart from docx.parts.document import DocumentPart from docx.parts.hdrftr import FooterPart, HeaderPart @@ -27,6 +28,7 @@ def part_class_selector(content_type, reltype): PartFactory.part_class_selector = part_class_selector PartFactory.part_type_for[CT.OPC_CORE_PROPERTIES] = CorePropertiesPart +PartFactory.part_type_for[CT.OPC_EXTENDED_PROPERTIES] = ExtendedPropertiesPart PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart PartFactory.part_type_for[CT.WML_FOOTER] = FooterPart PartFactory.part_type_for[CT.WML_HEADER] = HeaderPart @@ -37,6 +39,7 @@ def part_class_selector(content_type, reltype): del ( CT, CorePropertiesPart, + ExtendedPropertiesPart, DocumentPart, FooterPart, HeaderPart, diff --git a/docx/document.py b/docx/document.py index 6493c458b..d54468263 100644 --- a/docx/document.py +++ b/docx/document.py @@ -100,6 +100,14 @@ def core_properties(self): properties of this document. """ return self._part.core_properties + + @property + def extended_properties(self): + """ + A |AppProperties| object providing read/write access to the app + properties of this document. + """ + return self._part.extended_properties @property def inline_shapes(self): diff --git a/docx/opc/constants.py b/docx/opc/constants.py index b90aa394a..9d6118605 100644 --- a/docx/opc/constants.py +++ b/docx/opc/constants.py @@ -77,6 +77,9 @@ class CONTENT_TYPE(object): OPC_CORE_PROPERTIES = ( 'application/vnd.openxmlformats-package.core-properties+xml' ) + OPC_EXTENDED_PROPERTIES = ( + 'application/vnd.openxmlformats-officedocument.extended-properties+xml' + ) OPC_DIGITAL_SIGNATURE_CERTIFICATE = ( 'application/vnd.openxmlformats-package.digital-signature-certificat' 'e' @@ -409,8 +412,8 @@ class RELATIONSHIP_TYPE(object): '/control' ) CORE_PROPERTIES = ( - 'http://schemas.openxmlformats.org/package/2006/relationships/metada' - 'ta/core-properties' + 'http://schemas.openxmlformats.org/package/2006/relationships/metadata' + '/core-properties' ) CUSTOM_PROPERTIES = ( 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' diff --git a/docx/opc/extendedprops.py b/docx/opc/extendedprops.py new file mode 100644 index 000000000..061a287b1 --- /dev/null +++ b/docx/opc/extendedprops.py @@ -0,0 +1,97 @@ +# encoding: utf-8 + +""" +The :mod:`pptx.opc.extendedprops` module defines the ExtendedProperties class, which +coheres around the concerns of reading and writing application document +properties to and from the app.xml part of a .docx file. +""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import re + + +def _get_property_name_from_tag(tag): + name = tag.split('}')[-1] + snake_name = ''.join(['_' + i.lower() if i.isupper() else i for i in name]).lstrip('_') + return snake_name + + +class ExtendedProperties(object): + """ + Corresponds to part named ``/docProps/app.xml``, containing the extended + document properties for this document package. + """ + + def __init__(self, element): + self._element = element + self.template = None + self.manager = None + self.company = None + self.pages = None + self.words = None + self.characters = None + self.presentation_format = None + self.lines = None + self.paragraphs = None + self.slides = None + self.notes = None + self.total_time = None + self.hidden_slides = None + self.mm_clips = None + self.scale_crop = None + self.heading_pairs = None + self.titles_of_parts = None + self.links_up_to_date = None + self.characters_with_space = None + self.shared_doc = None + self.hyperlink_base = None + self.h_links = None + self.hyperlinks_changed = None + self.dig_sig = None + self.application = None + self.app_version = None + self.doc_security = None + self._property_elements = {} + + for child in self._element: + property_name = _get_property_name_from_tag(child.tag) + if hasattr(self, property_name): + setattr(self, property_name, child.text) + self._property_elements[property_name] = child + + def set_property(self, property_name, value): + if hasattr(self, property_name): + xml_element = self._property_elements.get(property_name) + if xml_element is not None: + xml_element.text = value + setattr(self, property_name, value) + else: + raise AttributeError(f"XML element not found for property '{property_name}'.") + else: + raise AttributeError(f"Property '{property_name}' not found in ExtendedProperties.") + + # @property + # def total_time(self): + # return self._element[1].text + # + # @total_time.setter + # def total_time(self, value): + # self._element[1].text = value + # + # @property + # def template(self): + # return self._element[0].text + # + # @template.setter + # def template(self, value): + # self._element[0].text = value + # + # @property + # def pages(self): + # return self._element[2].text + # + # @pages.setter + # def pages(self, value): + # self._element[2].text = value + diff --git a/docx/opc/package.py b/docx/opc/package.py index 7ba87bab5..19019f5ad 100644 --- a/docx/opc/package.py +++ b/docx/opc/package.py @@ -8,6 +8,7 @@ from docx.opc.packuri import PACKAGE_URI, PackURI from docx.opc.part import PartFactory from docx.opc.parts.coreprops import CorePropertiesPart +from docx.opc.parts.extendedprops import ExtendedPropertiesPart from docx.opc.pkgreader import PackageReader from docx.opc.pkgwriter import PackageWriter from docx.opc.rel import Relationships @@ -40,6 +41,14 @@ def core_properties(self): Core properties for this document. """ return self._core_properties_part.core_properties + + @property + def extended_properties(self): + """ + |AppProperties| object providing read/write access to the Dublin + App properties for this document. + """ + return self._extended_properties_part.extended_properties def iter_rels(self): """ @@ -183,6 +192,19 @@ def _core_properties_part(self): core_properties_part = CorePropertiesPart.default(self) self.relate_to(core_properties_part, RT.CORE_PROPERTIES) return core_properties_part + + @property + def _extended_properties_part(self): + """ + |ExtendedPropertiesPart| object related to this package. Creates + a default app properties part if one is not present (not common). + """ + try: + return self.part_related_by(RT.EXTENDED_PROPERTIES) + except KeyError: + extended_properties_part = ExtendedPropertiesPart.default(self) + self.relate_to(extended_properties_part, RT.EXTENDED_PROPERTIES) + return extended_properties_part class Unmarshaller(object): diff --git a/docx/opc/part.py b/docx/opc/part.py index 928d3c183..5bb92bb8b 100644 --- a/docx/opc/part.py +++ b/docx/opc/part.py @@ -16,6 +16,7 @@ from .shared import lazyproperty + class Part(object): """ Base class for package parts. Provides common properties and methods, but diff --git a/docx/opc/parts/extendedprops.py b/docx/opc/parts/extendedprops.py new file mode 100644 index 000000000..dd732972a --- /dev/null +++ b/docx/opc/parts/extendedprops.py @@ -0,0 +1,60 @@ +# encoding: utf-8 +# docx\opc\parts\extendedprops.py +""" +App properties part, corresponds to ``/docProps/app.xml`` part in package. +""" + +from __future__ import ( + absolute_import, division, print_function, unicode_literals +) + + +from ..constants import CONTENT_TYPE as CT +from ..extendedprops import ExtendedProperties +from ..part import XmlPart +from ...oxml.extendedprops import CT_ExtendedProperties +from ..packuri import PackURI + + +class ExtendedPropertiesPart(XmlPart): + """ + Corresponds to part named ``/docProps/app.xml``, containing the app + document properties for this document package. + """ + @classmethod + def default(cls, package): + """ + Return a new |ExtendedPropertiesPart| object initialized with default + values for its base properties. + """ + extended_properties_part = cls._new(package) + extended_properties = extended_properties_part.extended_properties + extended_properties.total_time = '1' + # extended_properties.pages = '1' + # extended_properties.company = 'Company' + # extended_properties.manager = 'Manager' + # extended_properties.category = 'Category' + # extended_properties.presentation_format = 'Presentation Format' + # extended_properties.links_up_to_date = 'false' + # extended_properties.characters = '1' + # extended_properties.lines = '1' + # extended_properties.paragraphs = '1' + + return extended_properties_part + + @property + def extended_properties(self): + """ + A |ExtendedProperties| object providing read/write access to the app + properties contained in this app properties part. + """ + return ExtendedProperties(self.element) + + @classmethod + def _new(cls, package): + partname = PackURI('/docProps/app.xml') + content_type = CT.OFC_EXTENDED_PROPERTIES + extended_properties = CT_ExtendedProperties.new() + return ExtendedPropertiesPart( + partname, content_type, extended_properties, package + ) diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 093c1b45b..8f7218f26 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -72,6 +72,9 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): from .coreprops import CT_CoreProperties # noqa register_element_cls('cp:coreProperties', CT_CoreProperties) +from .extendedprops import CT_ExtendedProperties # noqa +register_element_cls('ep:Properties', CT_ExtendedProperties) + from .document import CT_Body, CT_Document # noqa register_element_cls('w:body', CT_Body) register_element_cls('w:document', CT_Document) diff --git a/docx/oxml/extendedprops.py b/docx/oxml/extendedprops.py new file mode 100644 index 000000000..0dd17828d --- /dev/null +++ b/docx/oxml/extendedprops.py @@ -0,0 +1,175 @@ +from __future__ import ( + absolute_import, division, print_function, unicode_literals +) + +from datetime import datetime, timedelta +import re + +from docx.compat import is_string +from docx.oxml import parse_xml +from docx.oxml.ns import nsdecls, qn +from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne + + +class CT_ExtendedProperties(BaseOxmlElement): + """ + ```` element, the root element of the Extended Properties + part stored as ``/docProps/app.xml``. Implements the Extended document metadata + elements. String elements resolve to an empty string ('') if the element is + not present in the XML. + """ + totalTime = ZeroOrOne('TotalTime', successors=()) + pages = ZeroOrOne('Pages', successors=()) + template = ZeroOrOne('Template', successors=()) + + _extendedProperties_tmpl = ( + '\n' % nsdecls('xmlns', 'vt') + ) + + @classmethod + def new(cls): + xml = cls._extendedProperties_tmpl + extendedProperties = parse_xml(xml) + return extendedProperties + + @property + def total_time(self): + return self._text_of_element('TotalTime') + + @total_time.setter + def total_time(self, value): + self._set_element_text('TotalTime', value) + + @property + def template_text(self): + return self._text_of_element('Template') + + @template_text.setter + def template_text(self, value): + self._set_element_text('Template', value) + + @property + def pages_text(self): + return self._text_of_element('Pages') + + @pages_text.setter + def pages_text(self, value): + self._set_element_text('Pages', value) + + def _datetime_of_element(self, property_name): + element = getattr(self, property_name) + if element is None: + return None + datetime_str = element.text + try: + return self._parse_W3CDTF_to_datetime(datetime_str) + except ValueError: + # invalid datetime strings are ignored + return None + + def _get_or_add(self, prop_name): + """ + Return element returned by 'get_or_add_' method for *prop_name*. + """ + get_or_add_method_name = 'get_or_add_%s' % prop_name + get_or_add_method = getattr(self, get_or_add_method_name) + element = get_or_add_method() + return element + + @classmethod + def _offset_dt(cls, dt, offset_str): + """ + Return a |datetime| instance that is offset from datetime *dt* by + the timezone offset specified in *offset_str*, a string like + ``'-07:00'``. + """ + match = cls._offset_pattern.match(offset_str) + if match is None: + raise ValueError( + "'%s' is not a valid offset string" % offset_str + ) + sign, hours_str, minutes_str = match.groups() + sign_factor = -1 if sign == '+' else 1 + hours = int(hours_str) * sign_factor + minutes = int(minutes_str) * sign_factor + td = timedelta(hours=hours, minutes=minutes) + return dt + td + + _offset_pattern = re.compile(r'([+-])(\d\d):(\d\d)') + + @classmethod + def _parse_W3CDTF_to_datetime(cls, w3cdtf_str): + # valid W3CDTF date cases: + # yyyy e.g. '2003' + # yyyy-mm e.g. '2003-12' + # yyyy-mm-dd e.g. '2003-12-31' + # UTC timezone e.g. '2003-12-31T10:14:55Z' + # numeric timezone e.g. '2003-12-31T10:14:55-08:00' + templates = ( + '%Y-%m-%dT%H:%M:%S', + '%Y-%m-%d', + '%Y-%m', + '%Y', + ) + # strptime isn't smart enough to parse literal timezone offsets like + # '-07:30', so we have to do it ourselves + parseable_part = w3cdtf_str[:19] + offset_str = w3cdtf_str[19:] + dt = None + for tmpl in templates: + try: + dt = datetime.strptime(parseable_part, tmpl) + except ValueError: + continue + if dt is None: + tmpl = "could not parse W3CDTF datetime string '%s'" + raise ValueError(tmpl % w3cdtf_str) + if len(offset_str) == 6: + return cls._offset_dt(dt, offset_str) + return dt + + def _set_element_datetime(self, prop_name, value): + """ + Set date/time value of child element having *prop_name* to *value*. + """ + if not isinstance(value, datetime): + tmpl = ( + "property requires object, got %s" + ) + raise ValueError(tmpl % type(value)) + element = self._get_or_add(prop_name) + dt_str = value.strftime('%Y-%m-%dT%H:%M:%SZ') + element.text = dt_str + if prop_name in ('created', 'modified'): + # These two require an explicit 'xsi:type="dcterms:W3CDTF"' + # attribute. The first and last line are a hack required to add + # the xsi namespace to the root element rather than each child + # element in which it is referenced + self.set(qn('xsi:foo'), 'bar') + element.set(qn('xsi:type'), 'dcterms:W3CDTF') + del self.attrib[qn('xsi:foo')] + + def _set_element_text(self, prop_name, value): + """Set string value of *name* property to *value*.""" + if not is_string(value): + value = str(value) + + if len(value) > 255: + tmpl = ( + "exceeded 255 char limit for property, got:\n\n'%s'" + ) + raise ValueError(tmpl % value) + element = self._get_or_add(prop_name) + element.text = value + + def _text_of_element(self, property_name): + """ + Return the text in the element matching *property_name*, or an empty + string if the element is not present or contains no text. + """ + element = getattr(self, property_name) + if element is None: + return '' + if element.text is None: + return '' + return element.text diff --git a/docx/oxml/ns.py b/docx/oxml/ns.py index 6b0861284..f399ebbb6 100644 --- a/docx/oxml/ns.py +++ b/docx/oxml/ns.py @@ -11,6 +11,8 @@ "a": "http://schemas.openxmlformats.org/drawingml/2006/main", "c": "http://schemas.openxmlformats.org/drawingml/2006/chart", "cp": "http://schemas.openxmlformats.org/package/2006/metadata/core-properties", + "ep": "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties", + "vt": "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes", "dc": "http://purl.org/dc/elements/1.1/", "dcmitype": "http://purl.org/dc/dcmitype/", "dcterms": "http://purl.org/dc/terms/", @@ -85,13 +87,13 @@ def nsuri(self): """ return self._ns_uri - def nsdecls(*prefixes): """ Return a string containing a namespace declaration for each of the namespace prefix strings, e.g. 'p', 'ct', passed as *prefixes*. """ - return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes]) + # return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes]) + return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes if pfx in nsmap]) def nspfxmap(*nspfxs): diff --git a/docx/parts/document.py b/docx/parts/document.py index 59d0b7a71..3d32e9825 100644 --- a/docx/parts/document.py +++ b/docx/parts/document.py @@ -43,6 +43,14 @@ def core_properties(self): properties of this document. """ return self.package.core_properties + + @property + def extended_properties(self): + """ + A |AppProperties| object providing read/write access to the app + properties of this document. + """ + return self.package.extended_properties @property def document(self): diff --git a/tests/opc/parts/test_appprops.py b/tests/opc/parts/test_appprops.py new file mode 100644 index 000000000..921a885d3 --- /dev/null +++ b/tests/opc/parts/test_appprops.py @@ -0,0 +1,56 @@ +# encoding: utf-8 + +""" +Unit test suite for the docx.opc.parts.coreprops module +""" + +from __future__ import ( + absolute_import, division, print_function, unicode_literals +) + +from datetime import datetime, timedelta + +import pytest + +from docx.opc.coreprops import CoreProperties +from docx.opc.parts.coreprops import CorePropertiesPart +from docx.oxml.coreprops import CT_CoreProperties + +from ...unitutil.mock import class_mock, instance_mock + + +class DescribeCorePropertiesPart(object): + + def it_provides_access_to_its_core_props_object(self, coreprops_fixture): + core_properties_part, CoreProperties_ = coreprops_fixture + core_properties = core_properties_part.core_properties + CoreProperties_.assert_called_once_with(core_properties_part.element) + assert isinstance(core_properties, CoreProperties) + + def it_can_create_a_default_core_properties_part(self): + core_properties_part = CorePropertiesPart.default(None) + assert isinstance(core_properties_part, CorePropertiesPart) + core_properties = core_properties_part.core_properties + assert core_properties.title == 'Word Document' + assert core_properties.last_modified_by == 'python-docx' + assert core_properties.revision == 1 + delta = datetime.utcnow() - core_properties.modified + max_expected_delta = timedelta(seconds=2) + assert delta < max_expected_delta + + # fixtures --------------------------------------------- + + @pytest.fixture + def coreprops_fixture(self, element_, CoreProperties_): + core_properties_part = CorePropertiesPart(None, None, element_, None) + return core_properties_part, CoreProperties_ + + # fixture components ----------------------------------- + + @pytest.fixture + def CoreProperties_(self, request): + return class_mock(request, 'docx.opc.parts.coreprops.CoreProperties') + + @pytest.fixture + def element_(self, request): + return instance_mock(request, CT_CoreProperties)