+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 000000000..32f31c08c
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 000000000..105ce2da2
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 000000000..84d22c582
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 000000000..30aae523d
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/python-docx.iml b/.idea/python-docx.iml
new file mode 100644
index 000000000..0958f8f57
--- /dev/null
+++ b/.idea/python-docx.iml
@@ -0,0 +1,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 000000000..94a25f7f4
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docx/__init__.py b/docx/__init__.py
index 59756c021..d843da579 100644
--- a/docx/__init__.py
+++ b/docx/__init__.py
@@ -10,6 +10,7 @@
from docx.opc.constants import CONTENT_TYPE as CT, RELATIONSHIP_TYPE as RT
from docx.opc.part import PartFactory
from docx.opc.parts.coreprops import CorePropertiesPart
+from docx.opc.parts.extendedprops import ExtendedPropertiesPart
from docx.parts.document import DocumentPart
from docx.parts.hdrftr import FooterPart, HeaderPart
@@ -27,6 +28,7 @@ def part_class_selector(content_type, reltype):
PartFactory.part_class_selector = part_class_selector
PartFactory.part_type_for[CT.OPC_CORE_PROPERTIES] = CorePropertiesPart
+PartFactory.part_type_for[CT.OPC_EXTENDED_PROPERTIES] = ExtendedPropertiesPart
PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart
PartFactory.part_type_for[CT.WML_FOOTER] = FooterPart
PartFactory.part_type_for[CT.WML_HEADER] = HeaderPart
@@ -37,6 +39,7 @@ def part_class_selector(content_type, reltype):
del (
CT,
CorePropertiesPart,
+ ExtendedPropertiesPart,
DocumentPart,
FooterPart,
HeaderPart,
diff --git a/docx/document.py b/docx/document.py
index 6493c458b..d54468263 100644
--- a/docx/document.py
+++ b/docx/document.py
@@ -100,6 +100,14 @@ def core_properties(self):
properties of this document.
"""
return self._part.core_properties
+
+ @property
+ def extended_properties(self):
+ """
+ A |AppProperties| object providing read/write access to the app
+ properties of this document.
+ """
+ return self._part.extended_properties
@property
def inline_shapes(self):
diff --git a/docx/opc/constants.py b/docx/opc/constants.py
index b90aa394a..9d6118605 100644
--- a/docx/opc/constants.py
+++ b/docx/opc/constants.py
@@ -77,6 +77,9 @@ class CONTENT_TYPE(object):
OPC_CORE_PROPERTIES = (
'application/vnd.openxmlformats-package.core-properties+xml'
)
+ OPC_EXTENDED_PROPERTIES = (
+ 'application/vnd.openxmlformats-officedocument.extended-properties+xml'
+ )
OPC_DIGITAL_SIGNATURE_CERTIFICATE = (
'application/vnd.openxmlformats-package.digital-signature-certificat'
'e'
@@ -409,8 +412,8 @@ class RELATIONSHIP_TYPE(object):
'/control'
)
CORE_PROPERTIES = (
- 'http://schemas.openxmlformats.org/package/2006/relationships/metada'
- 'ta/core-properties'
+ 'http://schemas.openxmlformats.org/package/2006/relationships/metadata'
+ '/core-properties'
)
CUSTOM_PROPERTIES = (
'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
diff --git a/docx/opc/extendedprops.py b/docx/opc/extendedprops.py
new file mode 100644
index 000000000..061a287b1
--- /dev/null
+++ b/docx/opc/extendedprops.py
@@ -0,0 +1,97 @@
+# encoding: utf-8
+
+"""
+The :mod:`pptx.opc.extendedprops` module defines the ExtendedProperties class, which
+coheres around the concerns of reading and writing application document
+properties to and from the app.xml part of a .docx file.
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+
+
+def _get_property_name_from_tag(tag):
+ name = tag.split('}')[-1]
+ snake_name = ''.join(['_' + i.lower() if i.isupper() else i for i in name]).lstrip('_')
+ return snake_name
+
+
+class ExtendedProperties(object):
+ """
+ Corresponds to part named ``/docProps/app.xml``, containing the extended
+ document properties for this document package.
+ """
+
+ def __init__(self, element):
+ self._element = element
+ self.template = None
+ self.manager = None
+ self.company = None
+ self.pages = None
+ self.words = None
+ self.characters = None
+ self.presentation_format = None
+ self.lines = None
+ self.paragraphs = None
+ self.slides = None
+ self.notes = None
+ self.total_time = None
+ self.hidden_slides = None
+ self.mm_clips = None
+ self.scale_crop = None
+ self.heading_pairs = None
+ self.titles_of_parts = None
+ self.links_up_to_date = None
+ self.characters_with_space = None
+ self.shared_doc = None
+ self.hyperlink_base = None
+ self.h_links = None
+ self.hyperlinks_changed = None
+ self.dig_sig = None
+ self.application = None
+ self.app_version = None
+ self.doc_security = None
+ self._property_elements = {}
+
+ for child in self._element:
+ property_name = _get_property_name_from_tag(child.tag)
+ if hasattr(self, property_name):
+ setattr(self, property_name, child.text)
+ self._property_elements[property_name] = child
+
+ def set_property(self, property_name, value):
+ if hasattr(self, property_name):
+ xml_element = self._property_elements.get(property_name)
+ if xml_element is not None:
+ xml_element.text = value
+ setattr(self, property_name, value)
+ else:
+ raise AttributeError(f"XML element not found for property '{property_name}'.")
+ else:
+ raise AttributeError(f"Property '{property_name}' not found in ExtendedProperties.")
+
+ # @property
+ # def total_time(self):
+ # return self._element[1].text
+ #
+ # @total_time.setter
+ # def total_time(self, value):
+ # self._element[1].text = value
+ #
+ # @property
+ # def template(self):
+ # return self._element[0].text
+ #
+ # @template.setter
+ # def template(self, value):
+ # self._element[0].text = value
+ #
+ # @property
+ # def pages(self):
+ # return self._element[2].text
+ #
+ # @pages.setter
+ # def pages(self, value):
+ # self._element[2].text = value
+
diff --git a/docx/opc/package.py b/docx/opc/package.py
index 7ba87bab5..19019f5ad 100644
--- a/docx/opc/package.py
+++ b/docx/opc/package.py
@@ -8,6 +8,7 @@
from docx.opc.packuri import PACKAGE_URI, PackURI
from docx.opc.part import PartFactory
from docx.opc.parts.coreprops import CorePropertiesPart
+from docx.opc.parts.extendedprops import ExtendedPropertiesPart
from docx.opc.pkgreader import PackageReader
from docx.opc.pkgwriter import PackageWriter
from docx.opc.rel import Relationships
@@ -40,6 +41,14 @@ def core_properties(self):
Core properties for this document.
"""
return self._core_properties_part.core_properties
+
+ @property
+ def extended_properties(self):
+ """
+ |AppProperties| object providing read/write access to the Dublin
+ App properties for this document.
+ """
+ return self._extended_properties_part.extended_properties
def iter_rels(self):
"""
@@ -183,6 +192,19 @@ def _core_properties_part(self):
core_properties_part = CorePropertiesPart.default(self)
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
return core_properties_part
+
+ @property
+ def _extended_properties_part(self):
+ """
+ |ExtendedPropertiesPart| object related to this package. Creates
+ a default app properties part if one is not present (not common).
+ """
+ try:
+ return self.part_related_by(RT.EXTENDED_PROPERTIES)
+ except KeyError:
+ extended_properties_part = ExtendedPropertiesPart.default(self)
+ self.relate_to(extended_properties_part, RT.EXTENDED_PROPERTIES)
+ return extended_properties_part
class Unmarshaller(object):
diff --git a/docx/opc/part.py b/docx/opc/part.py
index 928d3c183..5bb92bb8b 100644
--- a/docx/opc/part.py
+++ b/docx/opc/part.py
@@ -16,6 +16,7 @@
from .shared import lazyproperty
+
class Part(object):
"""
Base class for package parts. Provides common properties and methods, but
diff --git a/docx/opc/parts/extendedprops.py b/docx/opc/parts/extendedprops.py
new file mode 100644
index 000000000..dd732972a
--- /dev/null
+++ b/docx/opc/parts/extendedprops.py
@@ -0,0 +1,60 @@
+# encoding: utf-8
+# docx\opc\parts\extendedprops.py
+"""
+App properties part, corresponds to ``/docProps/app.xml`` part in package.
+"""
+
+from __future__ import (
+ absolute_import, division, print_function, unicode_literals
+)
+
+
+from ..constants import CONTENT_TYPE as CT
+from ..extendedprops import ExtendedProperties
+from ..part import XmlPart
+from ...oxml.extendedprops import CT_ExtendedProperties
+from ..packuri import PackURI
+
+
+class ExtendedPropertiesPart(XmlPart):
+ """
+ Corresponds to part named ``/docProps/app.xml``, containing the app
+ document properties for this document package.
+ """
+ @classmethod
+ def default(cls, package):
+ """
+ Return a new |ExtendedPropertiesPart| object initialized with default
+ values for its base properties.
+ """
+ extended_properties_part = cls._new(package)
+ extended_properties = extended_properties_part.extended_properties
+ extended_properties.total_time = '1'
+ # extended_properties.pages = '1'
+ # extended_properties.company = 'Company'
+ # extended_properties.manager = 'Manager'
+ # extended_properties.category = 'Category'
+ # extended_properties.presentation_format = 'Presentation Format'
+ # extended_properties.links_up_to_date = 'false'
+ # extended_properties.characters = '1'
+ # extended_properties.lines = '1'
+ # extended_properties.paragraphs = '1'
+
+ return extended_properties_part
+
+ @property
+ def extended_properties(self):
+ """
+ A |ExtendedProperties| object providing read/write access to the app
+ properties contained in this app properties part.
+ """
+ return ExtendedProperties(self.element)
+
+ @classmethod
+ def _new(cls, package):
+ partname = PackURI('/docProps/app.xml')
+ content_type = CT.OFC_EXTENDED_PROPERTIES
+ extended_properties = CT_ExtendedProperties.new()
+ return ExtendedPropertiesPart(
+ partname, content_type, extended_properties, package
+ )
diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py
index 093c1b45b..8f7218f26 100644
--- a/docx/oxml/__init__.py
+++ b/docx/oxml/__init__.py
@@ -72,6 +72,9 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
from .coreprops import CT_CoreProperties # noqa
register_element_cls('cp:coreProperties', CT_CoreProperties)
+from .extendedprops import CT_ExtendedProperties # noqa
+register_element_cls('ep:Properties', CT_ExtendedProperties)
+
from .document import CT_Body, CT_Document # noqa
register_element_cls('w:body', CT_Body)
register_element_cls('w:document', CT_Document)
diff --git a/docx/oxml/extendedprops.py b/docx/oxml/extendedprops.py
new file mode 100644
index 000000000..0dd17828d
--- /dev/null
+++ b/docx/oxml/extendedprops.py
@@ -0,0 +1,175 @@
+from __future__ import (
+ absolute_import, division, print_function, unicode_literals
+)
+
+from datetime import datetime, timedelta
+import re
+
+from docx.compat import is_string
+from docx.oxml import parse_xml
+from docx.oxml.ns import nsdecls, qn
+from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne
+
+
+class CT_ExtendedProperties(BaseOxmlElement):
+ """
+ ```` element, the root element of the Extended Properties
+ part stored as ``/docProps/app.xml``. Implements the Extended document metadata
+ elements. String elements resolve to an empty string ('') if the element is
+ not present in the XML.
+ """
+ totalTime = ZeroOrOne('TotalTime', successors=())
+ pages = ZeroOrOne('Pages', successors=())
+ template = ZeroOrOne('Template', successors=())
+
+ _extendedProperties_tmpl = (
+ '\n' % nsdecls('xmlns', 'vt')
+ )
+
+ @classmethod
+ def new(cls):
+ xml = cls._extendedProperties_tmpl
+ extendedProperties = parse_xml(xml)
+ return extendedProperties
+
+ @property
+ def total_time(self):
+ return self._text_of_element('TotalTime')
+
+ @total_time.setter
+ def total_time(self, value):
+ self._set_element_text('TotalTime', value)
+
+ @property
+ def template_text(self):
+ return self._text_of_element('Template')
+
+ @template_text.setter
+ def template_text(self, value):
+ self._set_element_text('Template', value)
+
+ @property
+ def pages_text(self):
+ return self._text_of_element('Pages')
+
+ @pages_text.setter
+ def pages_text(self, value):
+ self._set_element_text('Pages', value)
+
+ def _datetime_of_element(self, property_name):
+ element = getattr(self, property_name)
+ if element is None:
+ return None
+ datetime_str = element.text
+ try:
+ return self._parse_W3CDTF_to_datetime(datetime_str)
+ except ValueError:
+ # invalid datetime strings are ignored
+ return None
+
+ def _get_or_add(self, prop_name):
+ """
+ Return element returned by 'get_or_add_' method for *prop_name*.
+ """
+ get_or_add_method_name = 'get_or_add_%s' % prop_name
+ get_or_add_method = getattr(self, get_or_add_method_name)
+ element = get_or_add_method()
+ return element
+
+ @classmethod
+ def _offset_dt(cls, dt, offset_str):
+ """
+ Return a |datetime| instance that is offset from datetime *dt* by
+ the timezone offset specified in *offset_str*, a string like
+ ``'-07:00'``.
+ """
+ match = cls._offset_pattern.match(offset_str)
+ if match is None:
+ raise ValueError(
+ "'%s' is not a valid offset string" % offset_str
+ )
+ sign, hours_str, minutes_str = match.groups()
+ sign_factor = -1 if sign == '+' else 1
+ hours = int(hours_str) * sign_factor
+ minutes = int(minutes_str) * sign_factor
+ td = timedelta(hours=hours, minutes=minutes)
+ return dt + td
+
+ _offset_pattern = re.compile(r'([+-])(\d\d):(\d\d)')
+
+ @classmethod
+ def _parse_W3CDTF_to_datetime(cls, w3cdtf_str):
+ # valid W3CDTF date cases:
+ # yyyy e.g. '2003'
+ # yyyy-mm e.g. '2003-12'
+ # yyyy-mm-dd e.g. '2003-12-31'
+ # UTC timezone e.g. '2003-12-31T10:14:55Z'
+ # numeric timezone e.g. '2003-12-31T10:14:55-08:00'
+ templates = (
+ '%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%d',
+ '%Y-%m',
+ '%Y',
+ )
+ # strptime isn't smart enough to parse literal timezone offsets like
+ # '-07:30', so we have to do it ourselves
+ parseable_part = w3cdtf_str[:19]
+ offset_str = w3cdtf_str[19:]
+ dt = None
+ for tmpl in templates:
+ try:
+ dt = datetime.strptime(parseable_part, tmpl)
+ except ValueError:
+ continue
+ if dt is None:
+ tmpl = "could not parse W3CDTF datetime string '%s'"
+ raise ValueError(tmpl % w3cdtf_str)
+ if len(offset_str) == 6:
+ return cls._offset_dt(dt, offset_str)
+ return dt
+
+ def _set_element_datetime(self, prop_name, value):
+ """
+ Set date/time value of child element having *prop_name* to *value*.
+ """
+ if not isinstance(value, datetime):
+ tmpl = (
+ "property requires object, got %s"
+ )
+ raise ValueError(tmpl % type(value))
+ element = self._get_or_add(prop_name)
+ dt_str = value.strftime('%Y-%m-%dT%H:%M:%SZ')
+ element.text = dt_str
+ if prop_name in ('created', 'modified'):
+ # These two require an explicit 'xsi:type="dcterms:W3CDTF"'
+ # attribute. The first and last line are a hack required to add
+ # the xsi namespace to the root element rather than each child
+ # element in which it is referenced
+ self.set(qn('xsi:foo'), 'bar')
+ element.set(qn('xsi:type'), 'dcterms:W3CDTF')
+ del self.attrib[qn('xsi:foo')]
+
+ def _set_element_text(self, prop_name, value):
+ """Set string value of *name* property to *value*."""
+ if not is_string(value):
+ value = str(value)
+
+ if len(value) > 255:
+ tmpl = (
+ "exceeded 255 char limit for property, got:\n\n'%s'"
+ )
+ raise ValueError(tmpl % value)
+ element = self._get_or_add(prop_name)
+ element.text = value
+
+ def _text_of_element(self, property_name):
+ """
+ Return the text in the element matching *property_name*, or an empty
+ string if the element is not present or contains no text.
+ """
+ element = getattr(self, property_name)
+ if element is None:
+ return ''
+ if element.text is None:
+ return ''
+ return element.text
diff --git a/docx/oxml/ns.py b/docx/oxml/ns.py
index 6b0861284..f399ebbb6 100644
--- a/docx/oxml/ns.py
+++ b/docx/oxml/ns.py
@@ -11,6 +11,8 @@
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
"c": "http://schemas.openxmlformats.org/drawingml/2006/chart",
"cp": "http://schemas.openxmlformats.org/package/2006/metadata/core-properties",
+ "ep": "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties",
+ "vt": "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes",
"dc": "http://purl.org/dc/elements/1.1/",
"dcmitype": "http://purl.org/dc/dcmitype/",
"dcterms": "http://purl.org/dc/terms/",
@@ -85,13 +87,13 @@ def nsuri(self):
"""
return self._ns_uri
-
def nsdecls(*prefixes):
"""
Return a string containing a namespace declaration for each of the
namespace prefix strings, e.g. 'p', 'ct', passed as *prefixes*.
"""
- return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes])
+ # return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes])
+ return ' '.join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes if pfx in nsmap])
def nspfxmap(*nspfxs):
diff --git a/docx/parts/document.py b/docx/parts/document.py
index 59d0b7a71..3d32e9825 100644
--- a/docx/parts/document.py
+++ b/docx/parts/document.py
@@ -43,6 +43,14 @@ def core_properties(self):
properties of this document.
"""
return self.package.core_properties
+
+ @property
+ def extended_properties(self):
+ """
+ A |AppProperties| object providing read/write access to the app
+ properties of this document.
+ """
+ return self.package.extended_properties
@property
def document(self):
diff --git a/tests/opc/parts/test_appprops.py b/tests/opc/parts/test_appprops.py
new file mode 100644
index 000000000..921a885d3
--- /dev/null
+++ b/tests/opc/parts/test_appprops.py
@@ -0,0 +1,56 @@
+# encoding: utf-8
+
+"""
+Unit test suite for the docx.opc.parts.coreprops module
+"""
+
+from __future__ import (
+ absolute_import, division, print_function, unicode_literals
+)
+
+from datetime import datetime, timedelta
+
+import pytest
+
+from docx.opc.coreprops import CoreProperties
+from docx.opc.parts.coreprops import CorePropertiesPart
+from docx.oxml.coreprops import CT_CoreProperties
+
+from ...unitutil.mock import class_mock, instance_mock
+
+
+class DescribeCorePropertiesPart(object):
+
+ def it_provides_access_to_its_core_props_object(self, coreprops_fixture):
+ core_properties_part, CoreProperties_ = coreprops_fixture
+ core_properties = core_properties_part.core_properties
+ CoreProperties_.assert_called_once_with(core_properties_part.element)
+ assert isinstance(core_properties, CoreProperties)
+
+ def it_can_create_a_default_core_properties_part(self):
+ core_properties_part = CorePropertiesPart.default(None)
+ assert isinstance(core_properties_part, CorePropertiesPart)
+ core_properties = core_properties_part.core_properties
+ assert core_properties.title == 'Word Document'
+ assert core_properties.last_modified_by == 'python-docx'
+ assert core_properties.revision == 1
+ delta = datetime.utcnow() - core_properties.modified
+ max_expected_delta = timedelta(seconds=2)
+ assert delta < max_expected_delta
+
+ # fixtures ---------------------------------------------
+
+ @pytest.fixture
+ def coreprops_fixture(self, element_, CoreProperties_):
+ core_properties_part = CorePropertiesPart(None, None, element_, None)
+ return core_properties_part, CoreProperties_
+
+ # fixture components -----------------------------------
+
+ @pytest.fixture
+ def CoreProperties_(self, request):
+ return class_mock(request, 'docx.opc.parts.coreprops.CoreProperties')
+
+ @pytest.fixture
+ def element_(self, request):
+ return instance_mock(request, CT_CoreProperties)