Skip to content
Browse files

- Added example-extracttext.py - a simple 2 line example of how to ex…

…tract

text from a docx document
- example.py is now example-makedocument.py
  • Loading branch information...
1 parent 6c33b98 commit 99ad6a80d546eb546f80d54bed74a253097bf51b @mikemaccana committed Jan 1, 2010
Showing with 27 additions and 8 deletions.
  1. +1 −1 docx.py
  2. +20 −0 example-extracttext.py
  3. +6 −7 example.py → example-makedocument.py
View
2 docx.py
@@ -238,7 +238,7 @@ def getdocumenttext(document):
for element in document.iter():
if element.tag == getns(docns,'w')+'t':
if element.text:
- contents = contents+element.text+'\n'
+ contents = contents+element.text
return contents
def docproperties(title,subject,creator,keywords,lastmodifiedby=None):
View
20 example-extracttext.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python2.6
+'''
+This file opens a docx (Office 2007) file and dumps the text.
+
+If you need to extract text from documents, use this file as a basis for your work.
+
+Part of Python's docx module - http://github.com/mikemaccana/python-docx
+See LICENSE for licensing information.
+'''
+from docx import *
+import sys
+if __name__ == '__main__':
+ try:
+ document = opendocx(sys.argv[1])
+ except:
+ print('Please supply a filename. For example:')
+ print(''' example-extracttext.py 'My Office 2007 document.docx' ''')
+ exit()
+ ## Fetch all the text out of the document we just created
+ print getdocumenttext(document)
View
13 example.py → example-makedocument.py
@@ -1,20 +1,22 @@
#!/usr/bin/env python2.6
'''
-Open and modify Microsoft Word 2007 docx files (called 'OpenXML' and 'Office OpenXML' by Microsoft)
+This file makes an docx (Office 2007) file from scratch, showing off most of python-docx's features.
+
+If you need to make documents from scratch, use this file as a basis for your work.
Part of Python's docx module - http://github.com/mikemaccana/python-docx
See LICENSE for licensing information.
'''
from docx import *
if __name__ == '__main__':
- #document = opendocx('Hello world.docx')
+ # Make a new document tree - this is the main part of a Word document
document = newdocument()
- # This location is where most document content lives
+ # This xpath location is where most interesting content lives
docbody = document.xpath('/w:document/w:body', namespaces=docns)[0]
- # Append two headings
+ # Append two headings and a paragraph
docbody.append(heading('''Welcome to Python's docx module''',1) )
docbody.append(heading('Make and edit docx in 200 lines of pure Python',2))
docbody.append(paragraph('The module was created when I was looking for a Python support for MS Word .doc files on PyPI and Stackoverflow. Unfortunately, the only solutions I could find used:'))
@@ -49,9 +51,6 @@
docbody.append(heading('Ideas? Questions? Want to contribute?',2))
docbody.append(paragraph('''Email <python.docx@librelist.com>'''))
- ## Fetch all the text out of the document we just created
- #print getdocumenttext(document)
- #print etree.tostring(document, pretty_print=True)
properties = docproperties('Python docx demo','A practical example of making docx from Python','Mike MacCana',['python','Office Open XML','Word'])
# Save our document

0 comments on commit 99ad6a8

Please sign in to comment.
Something went wrong with that request. Please try again.