In [1]:
from urllib.parse import unquote
import base64
import os
import html5lib
import lxml
import lxml.html
import lxml.etree

In [2]:
def url2fp(url):
    fp = url
    if fp.startswith("file:///"):
        fp = fp[8:]
    if not os.path.exists(fp):
        fp = unquote(fp)
    if not os.path.exists(fp):
        raise FileNotFoundError(url)
    return fp
    
class ImgEncoderCache():
    def __init__(self, path):
        self._path = path
        self._imgs = {}
        
    def __getitem__(self, key):
        ret = self._imgs.get(key)
        if ret is not None:
            return ret
        self.add_img(key)
        return self._imgs[key] 
    
    def add_img(self, file):
        if not os.path.isabs(file):
            fp = os.path.join(self._path, file)
        else:
            fp = file
            
        with open(fp, 'rb') as f:
            data = f.read()
        data = base64.b64encode(data).decode('utf-8')
        
        ext = os.path.splitext(file)[1][1:]
        src = "data:image/%s;base64,%s"%(ext, data)
        self._imgs[file] = src

In [3]:
def images_2_js(images):
    src = """
var images = {};
%s
document.querySelectorAll("img").forEach((img) => {
    img.src = images[img.getAttribute('data-img-src')];
});
"""
    images_as_src = []
    for img in images.items():
        images_as_src.append("images['%s'] = '%s';"%(img))
    
    return src % "\n".join(images_as_src)

def fix_report(url, out):
    fp = url2fp(url)
    cache = ImgEncoderCache(os.path.dirname(fp))

    parser = lxml.html.HTMLParser(encoding='utf8')
    with open(fp, 'rb') as f:
        html = lxml.html.parse(f, parser=parser)

    images = {}
    for img in html.getroot().xpath(".//img"):
        src = img.attrib['src']
        images[src] = cache[src]
        img.attrib['data-img-src'] = src
        
    js = images_2_js(images)
    script = lxml.etree.SubElement(html.xpath("/html")[0], "script", {"type": "application/javascript"})
    script.text = js

    with open(out, 'wb') as f:
        html.write(f, pretty_print=True, method='html')

In [4]:
import tkinter as tk, tkinter.ttk as ttk
from tkinter.messagebox import showinfo
from tkinter.filedialog import askopenfilename, asksaveasfilename
import webbrowser

def runfix(t1, t2, root):
    url = t1.get()
    out = t2.get()
    if not out.endswith(".html"):
        out += ".html"
    try:
        fix_report(url, out)
    except Exception as e:
        showinfo("derp", str(e))
    else:
        webbrowser.open_new_tab("file:///"+os.path.abspath(out).replace("\\","/"))
    root.destroy()
    
def browse(et, func):
    fn = func()
    if fn:
        set_text(et, fn)
    
def set_text(e, text):
    e.delete(0,tk.END)
    e.insert(0,text)

def runinterface():
    root = tk.Tk()

    l1 = ttk.Label(root, text="Enter Report URL:")
    t1 = ttk.Entry(root, width=70)
    b1 = ttk.Button(root, text="Browse", command=lambda: browse(t1, askopenfilename))

    l2 = ttk.Label(root, text="Enter Target Filepath")
    t2 = ttk.Entry(root, width=70)
    b2 = ttk.Button(root, text="Browse", command=lambda: browse(t2, asksaveasfilename))

    btn = ttk.Button(root, text="Go", command=lambda: runfix(t1, t2, root))

    l1.grid(row=1, column=1)
    t1.grid(row=1, column=2, columnspan=2)
    b1.grid(row=1, column=4)

    l2.grid(row=2, column=1)
    t2.grid(row=2, column=2, columnspan=2)
    b2.grid(row=2, column=4)

    btn.grid(row=3, column=2)

    root.mainloop()

In [6]:
# runinterface()

# theurl = "file:///C:/Users/Nathan/AppData/Local/Temp/TTf%7B712c2104-e65e-4ccb-b903-fa92aa53e64d%7D.html"
# outfile = "C:/Users/Nathan/Documents/Personal/test/jsimgtest2.html"

# fix_report(theurl, outfile)
# webbrowser.open_new_tab(outfile)

# import clipboard
# print(repr(clipboard.paste()))

In [5]:
tmp = "file:///C:/Users/Nathan/AppData/Local/Temp"
fdir = 'C:\\Users\\Nathan\\Documents\\PBS\\WIP procedures-reports\\3.1.0'
prefix = ""
docs = [
    #("", "End-to-End Traceability Report"),
    ("file:///C:/Users/Nathan/AppData/Local/Temp/TTf%7Bb1012fad-daa2-404b-98bb-79a6d6dbaced%7D.html", "Requirements Verification Report"),
    ("file:///C:/Users/Nathan/AppData/Local/Temp/TTf%7B1909994f-4ea6-4f48-8f07-5e38ab30d5c8%7D.html", "Test Run Execution Detail Report")
]
for url, fname in docs:
    if prefix:
        fname = "%s %s"%(prefix, fname)
    if not fname.endswith(".html"): fname += ".html"
    out = os.path.join(fdir, fname)
    fix_report(url, out)
    webbrowser.open_new_tab(out)