Permalink
Browse files

moving PyODConverter to GitHub

  • Loading branch information...
0 parents commit dbbc7599852959dc6014373d7aaa244026ebbab6 @mirkonasato committed Mar 10, 2012
Showing with 1,131 additions and 0 deletions.
  1. +231 −0 DocumentConverter.py
  2. +842 −0 LICENSE.txt
  3. +58 −0 README.md
@@ -0,0 +1,231 @@
+#
+# PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14
+#
+# This script converts a document from one office format to another by
+# connecting to an OpenOffice.org instance via Python-UNO bridge.
+#
+# Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
+# - or any later version.
+#
+DEFAULT_OPENOFFICE_PORT = 2002
+
+import uno
+from os.path import abspath, isfile, splitext
+from com.sun.star.beans import PropertyValue
+from com.sun.star.task import ErrorCodeIOException
+from com.sun.star.connection import NoConnectException
+
+FAMILY_TEXT = "Text"
+FAMILY_WEB = "Web"
+FAMILY_SPREADSHEET = "Spreadsheet"
+FAMILY_PRESENTATION = "Presentation"
+FAMILY_DRAWING = "Drawing"
+
+#---------------------#
+# Configuration Start #
+#---------------------#
+
+# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
+
+# most formats are auto-detected; only those requiring options are defined here
+IMPORT_FILTER_MAP = {
+ "txt": {
+ "FilterName": "Text (encoded)",
+ "FilterOptions": "utf8"
+ },
+ "csv": {
+ "FilterName": "Text - txt - csv (StarCalc)",
+ "FilterOptions": "44,34,0"
+ }
+}
+
+EXPORT_FILTER_MAP = {
+ "pdf": {
+ FAMILY_TEXT: { "FilterName": "writer_pdf_Export" },
+ FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" },
+ FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" },
+ FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" },
+ FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" }
+ },
+ "html": {
+ FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" },
+ FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" },
+ FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" }
+ },
+ "odt": {
+ FAMILY_TEXT: { "FilterName": "writer8" },
+ FAMILY_WEB: { "FilterName": "writerweb8_writer" }
+ },
+ "doc": {
+ FAMILY_TEXT: { "FilterName": "MS Word 97" }
+ },
+ "rtf": {
+ FAMILY_TEXT: { "FilterName": "Rich Text Format" }
+ },
+ "txt": {
+ FAMILY_TEXT: {
+ "FilterName": "Text",
+ "FilterOptions": "utf8"
+ }
+ },
+ "ods": {
+ FAMILY_SPREADSHEET: { "FilterName": "calc8" }
+ },
+ "xls": {
+ FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" }
+ },
+ "csv": {
+ FAMILY_SPREADSHEET: {
+ "FilterName": "Text - txt - csv (StarCalc)",
+ "FilterOptions": "44,34,0"
+ }
+ },
+ "odp": {
+ FAMILY_PRESENTATION: { "FilterName": "impress8" }
+ },
+ "ppt": {
+ FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" }
+ },
+ "swf": {
+ FAMILY_DRAWING: { "FilterName": "draw_flash_Export" },
+ FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" }
+ }
+}
+
+PAGE_STYLE_OVERRIDE_PROPERTIES = {
+ FAMILY_SPREADSHEET: {
+ #--- Scale options: uncomment 1 of the 3 ---
+ # a) 'Reduce / enlarge printout': 'Scaling factor'
+ "PageScale": 100,
+ # b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages'
+ #"ScaleToPagesX": 1, "ScaleToPagesY": 1000,
+ # c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages'
+ #"ScaleToPages": 1,
+ "PrintGrid": False
+ }
+}
+
+#-------------------#
+# Configuration End #
+#-------------------#
+
+class DocumentConversionException(Exception):
+
+ def __init__(self, message):
+ self.message = message
+
+ def __str__(self):
+ return self.message
+
+
+class DocumentConverter:
+
+ def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ try:
+ context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
+ except NoConnectException:
+ raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
+ self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
+
+ def convert(self, inputFile, outputFile):
+
+ inputUrl = self._toFileUrl(inputFile)
+ outputUrl = self._toFileUrl(outputFile)
+
+ loadProperties = { "Hidden": True }
+ inputExt = self._getFileExt(inputFile)
+ if IMPORT_FILTER_MAP.has_key(inputExt):
+ loadProperties.update(IMPORT_FILTER_MAP[inputExt])
+
+ document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties))
+ try:
+ document.refresh()
+ except AttributeError:
+ pass
+
+ family = self._detectFamily(document)
+ self._overridePageStyleProperties(document, family)
+
+ outputExt = self._getFileExt(outputFile)
+ storeProperties = self._getStoreProperties(document, outputExt)
+
+ try:
+ document.storeToURL(outputUrl, self._toProperties(storeProperties))
+ finally:
+ document.close(True)
+
+ def _overridePageStyleProperties(self, document, family):
+ if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family):
+ properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family]
+ pageStyles = document.getStyleFamilies().getByName('PageStyles')
+ for styleName in pageStyles.getElementNames():
+ pageStyle = pageStyles.getByName(styleName)
+ for name, value in properties.items():
+ pageStyle.setPropertyValue(name, value)
+
+ def _getStoreProperties(self, document, outputExt):
+ family = self._detectFamily(document)
+ try:
+ propertiesByFamily = EXPORT_FILTER_MAP[outputExt]
+ except KeyError:
+ raise DocumentConversionException, "unknown output format: '%s'" % outputExt
+ try:
+ return propertiesByFamily[family]
+ except KeyError:
+ raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
+
+ def _detectFamily(self, document):
+ if document.supportsService("com.sun.star.text.WebDocument"):
+ return FAMILY_WEB
+ if document.supportsService("com.sun.star.text.GenericTextDocument"):
+ # must be TextDocument or GlobalDocument
+ return FAMILY_TEXT
+ if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
+ return FAMILY_SPREADSHEET
+ if document.supportsService("com.sun.star.presentation.PresentationDocument"):
+ return FAMILY_PRESENTATION
+ if document.supportsService("com.sun.star.drawing.DrawingDocument"):
+ return FAMILY_DRAWING
+ raise DocumentConversionException, "unknown document family: %s" % document
+
+ def _getFileExt(self, path):
+ ext = splitext(path)[1]
+ if ext is not None:
+ return ext[1:].lower()
+
+ def _toFileUrl(self, path):
+ return uno.systemPathToFileUrl(abspath(path))
+
+ def _toProperties(self, dict):
+ props = []
+ for key in dict:
+ prop = PropertyValue()
+ prop.Name = key
+ prop.Value = dict[key]
+ props.append(prop)
+ return tuple(props)
+
+
+if __name__ == "__main__":
+ from sys import argv, exit
+
+ if len(argv) < 3:
+ print "USAGE: python %s <input-file> <output-file>" % argv[0]
+ exit(255)
+ if not isfile(argv[1]):
+ print "no such input file: %s" % argv[1]
+ exit(1)
+
+ try:
+ converter = DocumentConverter()
+ converter.convert(argv[1], argv[2])
+ except DocumentConversionException, exception:
+ print "ERROR! " + str(exception)
+ exit(1)
+ except ErrorCodeIOException, exception:
+ print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
+ exit(1)
+
Oops, something went wrong.

0 comments on commit dbbc759

Please sign in to comment.