In [6]:
# A non-trivial string of html code to parse as a test input
source = '<html><head><title>Test</title></head><body><h1>Parse me!</h1></body></html><img src="favorites.png" width="800" height="800" alt="Alt_name" title="Mytitle" align="center" /><html><head><title>Test</title></head><body><h1>Parse me!</h1></body></html>'

In [7]:
"""HTML Image handling for embedded images in markdown cells."""

#-----------------------------------------------------------------------------
# Copyright (c) 2013, the IPython Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
from ipython_genutils.py3compat import PY3
if PY3:
    from html.parser import HTMLParser
else:
    from HTMLParser import HTMLParser
import base64
import os.path

#-----------------------------------------------------------------------------
# Functions
#-----------------------------------------------------------------------------

__all__ = ['img2base64']


def img2base64(s):
    """Parse HTML image references in Markdown cells.

    This looks for HTML tags having a img tag name `img`
    and converts the image to a data URI for static embedding.
    The tranformation looks like this:

    `<img src="./Images/My_image.png" width="800" height="800" alt="Alt_name" title="Mytitle" align="center" />`

    Becomes

    `<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIA..." width="800" height="800" alt="Alt_name" title="Mytitle" align="center" />`

    Any HTML tag can be used, which allows the citations to be formatted
    in HTML in any manner.
    """
    parser = Img2Base64Parser()
    parser.feed(s)
    parser.close()
    outtext = u''
    startpos = 0
    for img in parser.imglist:
            outtext += s[startpos:img[1][0]]
            outtext += 'data:image/%s;base64,%s'% \
                (img[0][1],str(img[0][0]).lstrip('b\'').rstrip('\''))
            startpos = img[1][1] if len(img)==3 else -1
    outtext += s[startpos:] if startpos != -1 else ''
    return outtext

#-----------------------------------------------------------------------------
# Classes
#-----------------------------------------------------------------------------

class Img2Base64Parser(HTMLParser):
    """Image Parser
    Replaces html img file references with base64 encoded strings.
    
    Inherites from HTMLParser, overrides:
     - handle_starttag
     - handle_endtag
    """
    # number of open tags
    opentags = None
    # list of found imgs
    imglist = None
    # active img tag
    imgtag = None

    def __init__(self):
        self.imglist = []
        self.opentags = 0
        HTMLParser.__init__(self)
    
    def get_offset(self):
        # Compute startposition in source
        lin, offset = self.getpos()
        pos = 0
        for i in range(lin):
            pos = self.data.find('src=',pos) + 5
        return pos
        
    def handle_starttag(self, tag, attrs):
        # for each tag check if attributes are present and convert src to base64
        if self.opentags == 0 and len(attrs)>0:
            for atr, data in attrs:
                if atr.lower() == 'src':
                    self.imgtag = tag
                    self.opentags = 1
                    with open(data, "rb") as image_file:
                        encoded_data = base64.urlsafe_b64encode(image_file.read())
                        extension = os.path.splitext(data)[1][1:].strip().lower()
                    self.imglist.append([[encoded_data, extension],
                                         [self.get_offset(), self.get_offset()+len(data)]])
                    return
                
        if tag == self.imgtag:
            # found an open img tag but not the starting one  
            self.opentags += 1
  
    def handle_endtag(self, tag):
        if tag == self.imgtag:
            # found img tag check if starting one
            if self.opentags == 1:
                pos = self.get_offset()

                self.imglist[-1].append(pos+len(tag)+ 3)
            self.opentags -= 1
        
    def feed(self, data):
        self.data = data
        HTMLParser.feed(self, data)
        
output = img2base64(source)

In [8]:
#Couldn't figure out a programmatic way to copy the output to a new MD cell,
#so I just display it from a code cell
from IPython.display import display, Markdown
display(Markdown(output))

# The original text is displayed on either side of where the image should be

<html><head><title>Test</title></head><body><h1>Parse me!</h1></body></html><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABmJLR0QA_wD_AP-gvaeTAAAACXBIWXMAAABIAAAASABGyWs-AAAACXZwQWcAAAAYAAAAGAB4TKWmAAAEhUlEQVRIx91US4tcVRD-6j67e7r7ziPTQ9OZmHEwojJuxIhB1JCEBFdxETAqroMr_QGC4MJNQAVFFHwjCKKRSLIIGh0N0YAYDRHywjjJZN79uNN3-r7OqXLR3TOdSTI7NxYc6txzq77vq7p1D_AfG93u8Kft9-QMy-wnAolI8Njpi_76mMkHRy0yMAiCA0EMltrj56b1hgQ_79iWX7o0-5Jtuwf6BorjRDCiIJxpLTcnCyPFN_ZcmDl_6uG7qD7dOMApDuUHvQnTNPJKsR9U6-cMG-8VN_UdefKvOb6F4MS95crK4srxynh5IjuUhTVcALSG8ltQforaQjMO_MZhVnJ_afPI_lzeIDMnMCwDHKfQkYnWisjCtdlv-4ezL-y8tLS8SvDjxObc8lzzm8rYyG5v-1bqmxgTCJOkCViloqo-BWenEMwmYppArmxRZmxQiECiFERp0WFM0dU64sCRmb9nvy4U7Od3XfcjCwAaM_X9pcrIrtxYgdxRD-nSPNqJKZBqsEqRHffA8Q0yXFPs4Ry070OUXltaw_IIOqijVBl-emFq7lkAHxIAHB32fqhsGXhiYOcYwLoD3l1rAFgHKIoBzat7ThjKT8GJi3k_Pe8Y-iHrxJbBopCzzcwKVK22WrIoTVAaollEaZIOEBSLaCZRLKKYdMzgmIUjTRwzwBCYmrL54qgKmlssYekzHMOWJIFaakB6FIlmoONFS9srgW5p6FBDUgEEEBEQtedFREDMMAyyAWQsgbS0ZsWRRlpLRLR0yhZZAxYRFkgq0K32u87wdT1EVs9g5FxJoiQAsGDsm274aRhPCVtQfkzKjyj1Yyg_IbWckmqmUE1FuqlJB0yiBdSRSx3r3ZNlwBooUrTSOrNvaWXRAgBO4y-1NfSIVGsCAKIBCERYujoFPdZV26taRIQMglMqYtlXiW3iTQAwAMB2zI-WFhqXzfwQSQISJQCDCESQHnU9am-pwDTIGfEolQxqM9W33Iw9uUrw1HyznjabLza1FRr5AkC3vaLufKFZBtyyB00Zmb48dzzT776ye2aZAcDsBn2R6Kt7o2jaLXp7Xce0JElWWboq1z-3e27CrQxAsYN_Ls1_X3fN5w4urgTdWLM38atU__loGF7IZfN7c30ZV-JoQ-Vm1oG7eRBJaODCxbljV2165mU_atwUsz5pa9a5-HvV_61sOXu8_kIBSQzq-Qxd5WYhC7cyRKGv5bvLc8dej9WhmmH41xTzhgQtEfuE4uofQfjLNiXbS6WhTYjCm1pkD-bJKQ8gWExw9Mr8kcOJfrUOhNcVy5hJuiHgOxJogRMCmSrQOhWnJz2_NTo-MnS3yQpghlMegLPJQ-PGCn9yZf7TdzW_kwAhgBSAigWJwgYEDEC3p8sIgfRX5tOW3zLHvcID-ZJnWV4OC1N-_MHU0vufi3ysgQBtghBApIB4wxZpgG1AGYBmQGkgPityZnG5NX2f0I5WI-K3b9ReOyL4jAEfbYImrRHd9FNuOPCDBHKILBBsDZg7gImsYOgkYTIVsAmomEU1AYX_rf0L5MnfQwHIBdQAAAAldEVYdGRhdGU6Y3JlYXRlADIwMTAtMDItMTBUMTI6Mjg6MTAtMDY6MDAAG4MqAAAAJXRFWHRkYXRlOm1vZGlmeQAyMDA5LTA1LTI1VDIxOjIwOjE2LTA1OjAwhLoNbgAAAABJRU5ErkJggg==" width="800" height="800" alt="Alt_name" title="Mytitle" align="center" /><html><head><title>Test</title></head><body><h1>Parse me!</h1></body></html>

In [16]:
#Let's convert it to an .html and see if this helps
!jupyter nbconvert --to html img2base64.ipynb

[NbConvertApp] Converting notebook img2base64.ipynb to html
[NbConvertApp] Writing 269042 bytes to img2base64.html


In [17]:
#Running this cell shows the same thing - no image
!firefox img2base64.html