from __future__ import absolute_import
from fitz.fitz import *

'''The following is a collection of commodity functions to simplify the use of fitz / PyMupdf.
'''
#==============================================================================
# A generalized function for extracting a page's text.
# All that needs to be specified is a page object prviously created by the
# loadPage() method of a document.
#==============================================================================
def GetText(page, output = "text"):
    '''Extracts a PDF page's text.\nParameters are:\npage: object from a previous loadPage()\noutput option: text, html, json or xml.\nOutput will be a string representing the output of the TextPage extraction methods extractText, extractHTML, extractJSON, or etractXML respectively. Default and silent choice in case of spec error is "text".
    '''
    if not getattr(page, "parent", None):
        raise UserWarning("invalid page object provided to GetText")
    dl = fitz.DisplayList()
    dv = fitz.Device(dl)
    page.run(dv, fitz.Identity)
    ts = fitz.TextSheet()
    tp = fitz.TextPage()
    rect = page.bound()
    dl.run(fitz.Device(ts, tp), fitz.Identity, rect)
    if output.lower() == "json":
        return tp.extractJSON()
    elif output.lower() == "html":
        return tp.extractHTML()
    elif output.lower() == "xml":
        return tp.extractXML()
    return tp.extractText(basic = True)

#==============================================================================
# A generalized function for rendering a page's image.
# All that needs to be specified is a page object previously created by the
# loadPage() method of a document.
#==============================================================================
def GetPixmap(page, matrix = fitz.Identity, Colorspace = "RGB"):
    '''Creates a fitz.Pixmap object for a PDF page.\nParameters are:\npage: object from a previous loadPage()\nmatrix: a fitz.Matrix instance to specify required transformations. Defaults to fitz.Identity (no transformation).\nColorspace: text string to specify the required colour space (RGB, CMYK or GRAY). Default and silent choice in case of a spec error is "RGB".
    '''
    if not getattr(page, "parent", None):
        raise UserWarning("invalid page object provided to GetPixmap")
    if Colorspace.upper() == "RGB":
        cs = fitz.CS_RGB
    elif Colorspace.upper() == "CMYK":
        cs = fitz.CS_CMYK
    else:
        cs = fitz.CS_GRAY

    rect = page.bound().transform(matrix)     # scale page boundaries
    irect = rect.round()               # integer rectangle representing it
    pix = fitz.Pixmap(fitz.Colorspace(cs), irect)  # create an empty pixmap
    pix.clearWith(255)                         # clear it with color "white"
    dev = fitz.Device(pix)                     # create a "draw" device
    page.run(dev, matrix)                     # render the page
    return pix

#==============================================================================
# A function to collect all links of a PDF page.
# All that needs to be specified is a page object previously created by the
# loadPage() method of a document.
#==============================================================================
def GetLinks(page):
    '''Creates a list of all links contained in a PDF page.\nParameters are:\npage: object from a previous loadPage().\nThe returned list contains a Python dictionary for every link item found. Every dictionary contains the key "type" to specify the link type. The presence of other keys depends on this link type - see PyMuPDF's ducmentation for details.'''
    if not getattr(page, "parent", None):
        raise UserWarning("invalid page object provided to GetLinks")

    ln = page.loadLinks()
    links = []
    while ln:
        if ln.dest.kind == fitz.LINK_URI:
            nl = {"type":"uri", "uri": ln.dest.uri}

        elif ln.dest.kind == fitz.LINK_GOTO:
            nl = {"type": "goto", "page":ln.dest.page}

        elif ln.dest.kind == fitz.LINK_GOTOR:
            nl = {"type": "gotor", "file":ln.dest.fileSpec, "page": ln.dest.page}

        elif ln.dest.kind == fitz.LINK_LAUNCH:
            nl = {"type": "launch", "file":ln.dest.fileSpec}

        elif ln.dest.kind == fitz.LINK_NAMED:
            nl = {"type": "named", "name":ln.dest.named}

        else:
            pass
        links.append(nl)
        ln = ln.next
    return links

#==============================================================================
# A function to collect all bookmarks of a PDF document in the form of a table
# of contents. it is very similar to Document.getToC (or fitz.GetToC(doc)).
# The difference is an additional entry for each outline, which specifies
# any link destination information. For details what these are, see PyMuPDF's
# documentation.
#==============================================================================
def GetExtendedToC(doc):
    '''Creates an extended table of contents for a given PDF document.\nParameters are:\ndoc: a document object created with fitz.Document.\nOutput is a Python list, where each entry consists of outline level, title, page number and link destination information. For details see PyMuPDF's documentation.'''
    def recurse(olItem, liste, lvl):
        while olItem:
            if olItem.title:
                title = olItem.title.decode("utf-8")
            else:
                title = u" "
            page = 0
            link = {}
            if olItem.dest.kind == fitz.LINK_GOTO:
                page = olItem.dest.page + 1
            elif olItem.dest.kind == fitz.LINK_GOTOR:
                link = {"type": "gotor", "file":olItem.dest.fileSpec,
                        "page": olItem.dest.page}
                page = 0
            elif olItem.dest.kind == fitz.LINK_LAUNCH:
                link = {"type": "launch", "file":olItem.dest.fileSpec}
                page = 0
            elif olItem.dest.kind == fitz.LINK_URI:
                link = {"type":"uri", "uri": olItem.dest.uri}
                page = 0
            elif olItem.dest.kind == fitz.LINK_NAMED:
                link = {"type": "named", "name":olItem.dest.named}
                page = 0
            elif olItem.dest.kind == fitz.LINK_NONE:
                link = {"type": "none", "page":olItem.dest.page}

            liste.append([lvl, title, page, link])
            if olItem.down:
                liste = recurse(olItem.down, liste, lvl+1)
            olItem = olItem.next
        return liste

    if not getattr(doc, "authenticate", None):
        raise UserWarning("invalid document object provided to GetExtendedToC")

    if hasattr(doc, "outline"):
        olItem = doc.outline
    else:
        raise ValueError("document is still encrypted")
    if not olItem: return []
    lvl = 1
    liste = []
    return recurse(olItem, liste, lvl)

#================================================================
# Function: Table of Contents
#================================================================
def GetToC(doc):

    def recurse(olItem, liste, lvl):
        while olItem:
            if olItem.title:
                title = olItem.title.decode("utf-8")
            else:
                title = u" "
            if olItem.dest.kind == 1:
                page = olItem.dest.page + 1
            else:
                page = 0
            liste.append([lvl, title, page])
            if olItem.down:
                liste = recurse(olItem.down, liste, lvl+1)
            olItem = olItem.next
        return liste

    if not getattr(doc, "authenticate", None):
        raise UserWarning("invalid document object provided to GetToC")

    if hasattr(doc, "outline"):
        olItem = doc.outline
    else:
        raise ValueError("document is still encrypted")
    if not olItem: return []
    lvl = 1
    liste = []
    return recurse(olItem, liste, lvl)