forked from mirkonasato/pyodconverter
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit dbbc759
Showing
3 changed files
with
1,131 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
# | ||
# PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14 | ||
# | ||
# This script converts a document from one office format to another by | ||
# connecting to an OpenOffice.org instance via Python-UNO bridge. | ||
# | ||
# Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com> | ||
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html | ||
# - or any later version. | ||
# | ||
DEFAULT_OPENOFFICE_PORT = 2002 | ||
|
||
import uno | ||
from os.path import abspath, isfile, splitext | ||
from com.sun.star.beans import PropertyValue | ||
from com.sun.star.task import ErrorCodeIOException | ||
from com.sun.star.connection import NoConnectException | ||
|
||
FAMILY_TEXT = "Text" | ||
FAMILY_WEB = "Web" | ||
FAMILY_SPREADSHEET = "Spreadsheet" | ||
FAMILY_PRESENTATION = "Presentation" | ||
FAMILY_DRAWING = "Drawing" | ||
|
||
#---------------------# | ||
# Configuration Start # | ||
#---------------------# | ||
|
||
# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter | ||
|
||
# most formats are auto-detected; only those requiring options are defined here | ||
IMPORT_FILTER_MAP = { | ||
"txt": { | ||
"FilterName": "Text (encoded)", | ||
"FilterOptions": "utf8" | ||
}, | ||
"csv": { | ||
"FilterName": "Text - txt - csv (StarCalc)", | ||
"FilterOptions": "44,34,0" | ||
} | ||
} | ||
|
||
EXPORT_FILTER_MAP = { | ||
"pdf": { | ||
FAMILY_TEXT: { "FilterName": "writer_pdf_Export" }, | ||
FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" }, | ||
FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" }, | ||
FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" }, | ||
FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" } | ||
}, | ||
"html": { | ||
FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" }, | ||
FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" }, | ||
FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" } | ||
}, | ||
"odt": { | ||
FAMILY_TEXT: { "FilterName": "writer8" }, | ||
FAMILY_WEB: { "FilterName": "writerweb8_writer" } | ||
}, | ||
"doc": { | ||
FAMILY_TEXT: { "FilterName": "MS Word 97" } | ||
}, | ||
"rtf": { | ||
FAMILY_TEXT: { "FilterName": "Rich Text Format" } | ||
}, | ||
"txt": { | ||
FAMILY_TEXT: { | ||
"FilterName": "Text", | ||
"FilterOptions": "utf8" | ||
} | ||
}, | ||
"ods": { | ||
FAMILY_SPREADSHEET: { "FilterName": "calc8" } | ||
}, | ||
"xls": { | ||
FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" } | ||
}, | ||
"csv": { | ||
FAMILY_SPREADSHEET: { | ||
"FilterName": "Text - txt - csv (StarCalc)", | ||
"FilterOptions": "44,34,0" | ||
} | ||
}, | ||
"odp": { | ||
FAMILY_PRESENTATION: { "FilterName": "impress8" } | ||
}, | ||
"ppt": { | ||
FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" } | ||
}, | ||
"swf": { | ||
FAMILY_DRAWING: { "FilterName": "draw_flash_Export" }, | ||
FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" } | ||
} | ||
} | ||
|
||
PAGE_STYLE_OVERRIDE_PROPERTIES = { | ||
FAMILY_SPREADSHEET: { | ||
#--- Scale options: uncomment 1 of the 3 --- | ||
# a) 'Reduce / enlarge printout': 'Scaling factor' | ||
"PageScale": 100, | ||
# b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages' | ||
#"ScaleToPagesX": 1, "ScaleToPagesY": 1000, | ||
# c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages' | ||
#"ScaleToPages": 1, | ||
"PrintGrid": False | ||
} | ||
} | ||
|
||
#-------------------# | ||
# Configuration End # | ||
#-------------------# | ||
|
||
class DocumentConversionException(Exception): | ||
|
||
def __init__(self, message): | ||
self.message = message | ||
|
||
def __str__(self): | ||
return self.message | ||
|
||
|
||
class DocumentConverter: | ||
|
||
def __init__(self, port=DEFAULT_OPENOFFICE_PORT): | ||
localContext = uno.getComponentContext() | ||
resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) | ||
try: | ||
context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port) | ||
except NoConnectException: | ||
raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port | ||
self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context) | ||
|
||
def convert(self, inputFile, outputFile): | ||
|
||
inputUrl = self._toFileUrl(inputFile) | ||
outputUrl = self._toFileUrl(outputFile) | ||
|
||
loadProperties = { "Hidden": True } | ||
inputExt = self._getFileExt(inputFile) | ||
if IMPORT_FILTER_MAP.has_key(inputExt): | ||
loadProperties.update(IMPORT_FILTER_MAP[inputExt]) | ||
|
||
document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties)) | ||
try: | ||
document.refresh() | ||
except AttributeError: | ||
pass | ||
|
||
family = self._detectFamily(document) | ||
self._overridePageStyleProperties(document, family) | ||
|
||
outputExt = self._getFileExt(outputFile) | ||
storeProperties = self._getStoreProperties(document, outputExt) | ||
|
||
try: | ||
document.storeToURL(outputUrl, self._toProperties(storeProperties)) | ||
finally: | ||
document.close(True) | ||
|
||
def _overridePageStyleProperties(self, document, family): | ||
if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family): | ||
properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family] | ||
pageStyles = document.getStyleFamilies().getByName('PageStyles') | ||
for styleName in pageStyles.getElementNames(): | ||
pageStyle = pageStyles.getByName(styleName) | ||
for name, value in properties.items(): | ||
pageStyle.setPropertyValue(name, value) | ||
|
||
def _getStoreProperties(self, document, outputExt): | ||
family = self._detectFamily(document) | ||
try: | ||
propertiesByFamily = EXPORT_FILTER_MAP[outputExt] | ||
except KeyError: | ||
raise DocumentConversionException, "unknown output format: '%s'" % outputExt | ||
try: | ||
return propertiesByFamily[family] | ||
except KeyError: | ||
raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt) | ||
|
||
def _detectFamily(self, document): | ||
if document.supportsService("com.sun.star.text.WebDocument"): | ||
return FAMILY_WEB | ||
if document.supportsService("com.sun.star.text.GenericTextDocument"): | ||
# must be TextDocument or GlobalDocument | ||
return FAMILY_TEXT | ||
if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"): | ||
return FAMILY_SPREADSHEET | ||
if document.supportsService("com.sun.star.presentation.PresentationDocument"): | ||
return FAMILY_PRESENTATION | ||
if document.supportsService("com.sun.star.drawing.DrawingDocument"): | ||
return FAMILY_DRAWING | ||
raise DocumentConversionException, "unknown document family: %s" % document | ||
|
||
def _getFileExt(self, path): | ||
ext = splitext(path)[1] | ||
if ext is not None: | ||
return ext[1:].lower() | ||
|
||
def _toFileUrl(self, path): | ||
return uno.systemPathToFileUrl(abspath(path)) | ||
|
||
def _toProperties(self, dict): | ||
props = [] | ||
for key in dict: | ||
prop = PropertyValue() | ||
prop.Name = key | ||
prop.Value = dict[key] | ||
props.append(prop) | ||
return tuple(props) | ||
|
||
|
||
if __name__ == "__main__": | ||
from sys import argv, exit | ||
|
||
if len(argv) < 3: | ||
print "USAGE: python %s <input-file> <output-file>" % argv[0] | ||
exit(255) | ||
if not isfile(argv[1]): | ||
print "no such input file: %s" % argv[1] | ||
exit(1) | ||
|
||
try: | ||
converter = DocumentConverter() | ||
converter.convert(argv[1], argv[2]) | ||
except DocumentConversionException, exception: | ||
print "ERROR! " + str(exception) | ||
exit(1) | ||
except ErrorCodeIOException, exception: | ||
print "ERROR! ErrorCodeIOException %d" % exception.ErrCode | ||
exit(1) | ||
|
Oops, something went wrong.