In [51]:
 #!/usr/bin/env python"""Convert a MS-Office file into a PDF Uses 'textutil' to convert to RTF then 'convert' to convert RTF to PDF"""

import argparse
import os
import os.path
import subprocess
import sys
import tempfile

TEXTUTIL = "textutil"


def rtf2pdf(rtf_filename, pdf_filename):
    """Convert given rtf file to a pdf file with given name
    Uses cupsfilter - kudos http://stackoverflow.com/a/22119831/197789"""
    # Prior to Mavericks, I used the following directly
    # /System/Library/Printers/Libraries/convert
    p = subprocess.Popen(["cupsfilter", rtf_filename],
                         stdout=subprocess.PIPE)
    with open(pdf_filename, "w") as pdf_file:
        while True:
            data = p.stdout.read()
            if len(data) == 0:
                break
            pdf_file.write(data)


def main(srcs):
    # Do argv default this way, as doing it in the functional
    # declaration sets it at compile time.
#     if argv is None:
#         argv = sys.argv

#     # Argument parsing
#     parser = argparse.ArgumentParser(
#         description=__doc__,  # printed with -h/--help
#         # Don't mess with format of description
#         formatter_class=argparse.RawDescriptionHelpFormatter,
#         # To have --help print defaults with trade-off it changes
#         # formatting, use: ArgumentDefaultsHelpFormatter
#     )

#     parser.add_argument('srcs', metavar='PATH', type=str, nargs="+",
#                         help="Path to source file")

#     args = parser.parse_args()

    error = False

    for src in srcs:
        if not os.path.exists(src):
            print("{}: does not exist".format(src))
            error = True
            continue
        filename_base = os.path.splitext(src)[0]
        dest_filename = filename_base + ".pdf"
        if os.path.exists(dest_filename):
            print("{}: {} already exists, skipping".format(src, dest_filename))
            continue
        tmp_rtf_filename = tempfile.mkstemp(suffix=".rtf")[1]

        retcode = subprocess.call([TEXTUTIL,
                                   "-convert", "rtf",
                                   "-output", tmp_rtf_filename,
                                   src])
        if retcode != 0:
            print("{}: Failed to convert " \
                "(to intermediate RTF format)".format(src))
            error = True
            continue

        rtf2pdf(tmp_rtf_filename, dest_filename)
        os.unlink(tmp_rtf_filename)

        print("{}: Converted to {}".format(src, dest_filename))

    return(0 if not error else 1)


In [53]:
srcs = ["/home/nsia/Desktop/NSIA/sigtas/scripts/data/example7.rtf"]
main(srcs)

FileNotFoundError: [Errno 2] No such file or directory: 'textutil': 'textutil'

In [39]:
import re
"""
Taken from https://gist.github.com/gilsondev/7c1d2d753ddb522e7bc22511cfb08676
and modified for better output of tables.
"""

# control words which specify a "destination".
destinations = frozenset((
    'aftncn','aftnsep','aftnsepc','annotation','atnauthor','atndate','atnicn','atnid',
    'atnparent','atnref','atntime','atrfend','atrfstart','author','background',
    'bkmkend','bkmkstart','blipuid','buptim','category','colorschememapping',
    'colortbl','comment','company','creatim','datafield','datastore','defchp','defpap',
    'do','doccomm','docvar','dptxbxtext','ebcend','ebcstart','factoidname','falt',
    'fchars','ffdeftext','ffentrymcr','ffexitmcr','ffformat','ffhelptext','ffl',
    'ffname','ffstattext','field','file','filetbl','fldinst','fldrslt','fldtype',
    'fname','fontemb','fontfile','fonttbl','footer','footerf','footerl','footerr',
    'footnote','formfield','ftncn','ftnsep','ftnsepc','g','generator','gridtbl',
    'header','headerf','headerl','headerr','hl','hlfr','hlinkbase','hlloc','hlsrc',
    'hsv','htmltag','info','keycode','keywords','latentstyles','lchars','levelnumbers',
    'leveltext','lfolevel','linkval','list','listlevel','listname','listoverride',
    'listoverridetable','listpicture','liststylename','listtable','listtext',
    'lsdlockedexcept','macc','maccPr','mailmerge','maln','malnScr','manager','margPr',
    'mbar','mbarPr','mbaseJc','mbegChr','mborderBox','mborderBoxPr','mbox','mboxPr',
    'mchr','mcount','mctrlPr','md','mdeg','mdegHide','mden','mdiff','mdPr','me',
    'mendChr','meqArr','meqArrPr','mf','mfName','mfPr','mfunc','mfuncPr','mgroupChr',
    'mgroupChrPr','mgrow','mhideBot','mhideLeft','mhideRight','mhideTop','mhtmltag',
    'mlim','mlimloc','mlimlow','mlimlowPr','mlimupp','mlimuppPr','mm','mmaddfieldname',
    'mmath','mmathPict','mmathPr','mmaxdist','mmc','mmcJc','mmconnectstr',
    'mmconnectstrdata','mmcPr','mmcs','mmdatasource','mmheadersource','mmmailsubject',
    'mmodso','mmodsofilter','mmodsofldmpdata','mmodsomappedname','mmodsoname',
    'mmodsorecipdata','mmodsosort','mmodsosrc','mmodsotable','mmodsoudl',
    'mmodsoudldata','mmodsouniquetag','mmPr','mmquery','mmr','mnary','mnaryPr',
    'mnoBreak','mnum','mobjDist','moMath','moMathPara','moMathParaPr','mopEmu',
    'mphant','mphantPr','mplcHide','mpos','mr','mrad','mradPr','mrPr','msepChr',
    'mshow','mshp','msPre','msPrePr','msSub','msSubPr','msSubSup','msSubSupPr','msSup',
    'msSupPr','mstrikeBLTR','mstrikeH','mstrikeTLBR','mstrikeV','msub','msubHide',
    'msup','msupHide','mtransp','mtype','mvertJc','mvfmf','mvfml','mvtof','mvtol',
    'mzeroAsc','mzeroDesc','mzeroWid','nesttableprops','nextfile','nonesttables',
    'objalias','objclass','objdata','object','objname','objsect','objtime','oldcprops',
    'oldpprops','oldsprops','oldtprops','oleclsid','operator','panose','password',
    'passwordhash','pgp','pgptbl','picprop','pict','pn','pnseclvl','pntext','pntxta',
    'pntxtb','printim','private','propname','protend','protstart','protusertbl','pxe',
    'result','revtbl','revtim','rsidtbl','rxe','shp','shpgrp','shpinst',
    'shppict','shprslt','shptxt','sn','sp','staticval','stylesheet','subject','sv',
    'svb','tc','template','themedata','title','txe','ud','upr','userprops',
    'wgrffmtfilter','windowcaption','writereservation','writereservhash','xe','xform',
    'xmlattrname','xmlattrvalue','xmlclose','xmlname','xmlnstbl',
    'xmlopen',
    ))


# Translation of some special characters.
specialchars = {
    'par': '\n',
    'sect': '\n\n',
    'page': '\n\n',
    'line': '\n',
    'tab': '\t',
    'emdash': '\u2014',
    'endash': '\u2013',
    'emspace': '\u2003',
    'enspace': '\u2002',
    'qmspace': '\u2005',
    'bullet': '\u2022',
    'lquote': '\u2018',
    'rquote': '\u2019',
    'ldblquote': '\201C',
    'rdblquote': '\u201D',
    'row':'\n',
    'cell': '|',
    'nestcell': '|'
    }

PATTERN  = re.compile(r"\\([a-z]{1,32})(-?\d{1,10})?[ ]?|\\'([0-9a-f]{2})|\\([^a-z])|([{}])|[\r\n]+|(.)", re.I)

def rtf_to_text(text):
    stack = []
    ignorable = False       # Whether this group (and all inside it) are "ignorable".
    ucskip = 1              # Number of ASCII characters to skip after a unicode character.
    curskip = 0             # Number of ASCII characters left to skip
    out = []                # Output buffer.
    for match in PATTERN.finditer(text):
        word,arg,hex,char,brace,tchar = match.groups()
        if brace:
            curskip = 0
            if brace == '{':
                # Push state
                stack.append((ucskip,ignorable))
            elif brace == '}':
                # Pop state
                try:
                    ucskip,ignorable = stack.pop()
                # sample_3.rtf throws an IndexError because of stack being empty.
                # don't know right now how this could happen, so for now this is
                # a ugly hack to prevent it
                except IndexError:
                    ucskip = 0
                    ignorable = True
        elif char: # \x (not a letter)
            curskip = 0
            if char == '~':
                if not ignorable:
                    out.append('\xA0') # NBSP
            elif char in '{}\\':
                if not ignorable:
                    out.append(char)
            elif char == '*':
                ignorable = True
            elif char == '\n':
                if not ignorable:
                    out.append('\x0A')  # LF
            elif char == '\r':
                if not ignorable:
                    out.append('\x0D')  # CR
        elif word: # \foo
            curskip = 0
            if word in destinations:
                ignorable = True
            elif ignorable:
                pass
            elif word in specialchars:
                out.append(specialchars[word])
            elif word == 'uc':
                ucskip = int(arg)
            elif word == 'u':
                c = int(arg)
                if c < 0: c += 0x10000
                if c > 127: out.append(chr(c)) #NOQA
                else: out.append(chr(c))
                curskip = ucskip
        elif hex: # \'xx
            if curskip > 0:
                curskip -= 1
            elif not ignorable:
                c = int(hex,16)
                if c > 127: out.append(chr(c)) #NOQA
                else: out.append(chr(c))
        elif tchar:
            if curskip > 0:
                curskip -= 1
            elif not ignorable:
                out.append(tchar)
    return ''.join(out)

In [65]:
f= open("data/example7.rtf","r")
contents =f.read()

text = rtf_to_text(contents)

# print(text)
from fpdf import FPDF 
   
# # save FPDF() class into  
# # a variable pdf 
pdf = FPDF()    
   
# # Add a page 
pdf.add_page() 
pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf', uni=True)
pdf.set_font('DejaVu', '', 14)
# # set style and size of font  
# # that you want in the pdf 
pdf.set_font("Arial", size = 15) 
  
pdf.cell(20, 10, txt = text, ln = 1, align = 'C')
    
pdf.output("data/test.pdf")

RuntimeError: TTF Font file not found: DejaVuSansCondensed.ttf