# Notebook(s) to HTML

#### Purpose: 
This tools helps to convert all the notebooks in to a single HTML page with navigation. This is especially helpful when you are explaining a single topic in multiple parts. 

## Input Parameters

In [43]:
source_listfile = 'list_ipynb.csv'   # for now let us have the list of ipynb to be processed in same list
template_file = "ipy2htmlheadless_converter.tplx"
nbheader_template = 'nbheader_template.html'
style_template = 'style_template.html'
nav_script = 'navigation_script.html'
output_filename = '20_Normal_Distribution.html'

## Jinja Template for Headless HTML

In [44]:
%%writefile $template_file

{% extends 'full.tpl'%}

{% block header %}
{% endblock header %}

Overwriting ipy2htmlheadless_converter.tplx


## Convert each ipynb to headless HTML

Also get the no of files and headers of each file in the process. This script already takes care of integrating attachments as base64 in the notebook

<font color='red'>Ensure at least one header h1 is present as title. Currently script is not foolproof</font>

In [45]:
import nbformat
import nbconvert
import sys
import os
import ntpath
import csv
from bs4 import BeautifulSoup

result_paths, counter, titles, bodies = [], 0, [], []

# 1. Read the list of .ipynb files from list_ipynb.csv in same folder


with open('list_ipynb.csv') as csvfile:
    ipynb_reader = csv.reader(csvfile)
    for each_row in ipynb_reader:
        current_file =  each_row[0]
        # print(current_file)

        """
        2. For each file, create respective html 
        https://github.com/jupyter/nbconvert/issues/699
        """
        with open(current_file, 'rb') as nb_file:
            nb_contents = nb_file.read().decode('utf8')  

        # Convert using the ordinary exporter
        notebook = nbformat.reads(nb_contents, as_version=4)      
        
#         inname = ntpath.basename(current_file)
#         outpath = os.path.dirname(current_file)
#         outname = inname.split('.ipynb')[0] + '_HEADLESS.html'
#         print("\nInput:{} \nOutput:{} \nPath:{}".format(inname, outname, outpath))
#         outpath = os.path.join(outpath,outname)  # outputting in same folder as ipynb file        
#         result_paths.append(outpath)  # to write list of htmls in a file        
        
        exporter = nbconvert.HTMLExporter()
        exporter.template_file = template_file   # TEMPLATE FILE GOES HERE
        body, res = exporter.from_notebook_node(notebook)        

        # Create a list saving all image attachments to their base64 representations
        images = []
        for cell in notebook['cells']:
            if 'attachments' in cell:
                attachments = cell['attachments']
                for filename, attachment in attachments.items():
                    for mime, base64 in attachment.items():
                        images.append( [f'attachment:{filename}', f'data:{mime};base64,{base64}'] )

        # Fix up the HTML and write it to disk
        for itmes in images:
            src = itmes[0]
            base64 = itmes[1]
            body = body.replace(f'src="{src}"', f'src="{base64}"', 1)  

                
#         with open(outpath, 'w') as output_file:
#             output_file.write(body)   

        # extract first h1 text
        soup = BeautifulSoup(body, 'html.parser')
        title = soup.h1.contents[0]
        titles.append(title)
        print(title)            
        bodies.append(body)
        counter += 1
        #print('{} is done'.format(each_row[0]))   

# list_str = ''
# for each_html in result_paths:
#     list_str += each_html + '\n'

# with open('list_html.csv','w') as output_listfile:
#     output_listfile.write(list_str) 

The Binomial Distribution
The Normal Distribution
Sampling Distributions
Using Normal Distribution and Z Score
Calculating sum of dice outcomes


## Write html with beautiful soup

In [46]:
head_soup = BeautifulSoup(open(nbheader_template),"html.parser")

style_soup = BeautifulSoup(open(style_template),"html.parser")

base_template = "<!DOCTYPE html><html></html>"
main_soup = BeautifulSoup(base_template,"html.parser")

main_soup.html.append(head_soup)  # add nbconvert header


# INSERT THE BODY AS IT IS
bodies = [body.replace('<body>','').replace('</body>','') for body in bodies]  # no need of body tags
# bodies = ['<div>Test div' + str(i+1) + '</div>' for i in range(3)] # for MWE
body_tag = main_soup.new_tag('body')
for i,each_body in enumerate(bodies):
    
    # monkey patch to change ID of first div
    old_ID_str = '<div tabindex="-1" id="notebook" class="border-box-sizing">'
    new_ID_str = '<div tabindex="-1" id="notebook_{}" class="border-box-sizing">'.format(i)    
    div_str = each_body.replace(old_ID_str,new_ID_str)
    # apparantly html space character is messed up    
    nonBreakSpace = '&nbsp;' #u'\xa0'
    div_str = div_str.replace(nonBreakSpace,'')
    div_soup = BeautifulSoup(div_str,'html.parser')    
    body_tag.append(div_soup)
    
main_soup.html.insert(1,body_tag)    

# we write before further ops because some issue in existing soup
with open(output_filename, "w") as file:
    file.write(str(main_soup))

In [47]:
# set up the display settings and the navigation part

# we read back caz some issue continuing soup from above
soup = BeautifulSoup(open(output_filename),"html.parser")  


# display first page
first_page_soup = soup.find('div', {'id': 'notebook_0'})
first_page_soup.attrs['style'] ='display:block;'

# hide other pages
for i in range(1,counter): # from second page...
    rest_page_soup = soup.find('div', {'id': 'notebook_{}'.format(i)})
    rest_page_soup.attrs['style'] ='display:none;'
    
# navigation tags
nav_tag = soup.new_tag('nav') 
ul_tag = soup.new_tag('ul', class_='nav')
soup.body.insert(0,nav_tag)
soup.body.nav.insert(0,ul_tag)
    
for i in reversed(range(counter)):
    li_tag = soup.new_tag('li')
    a_tag = soup.new_tag('a', href='#', onclick='divVisibility({});'.format(i))
    a_tag.string = 'Part ' + str(i+1) + ' : ' + titles[i]
    li_tag.append(a_tag)
    soup.body.nav.ul.insert(0,li_tag)
    
# script tag for navigation
# script tag for navigation
divsID_js = "var divsID = ["
for i in range(counter):
    divsID_js += "'notebook_{}' , ".format(i)
divsID_js += '];'
divsID_js = '<script>' + divsID_js + '</script>'
prescript_soup = BeautifulSoup(divsID_js, 'html.parser')
soup.body.append(prescript_soup)  # add divsId array

script_soup = BeautifulSoup(open(nav_script),"html.parser")  
soup.body.append(script_soup)  # add nbconver header

# update the changes
with open(output_filename, "w") as file:
    file.write(str(soup.prettify()))

In [48]:
import os
os.startfile(output_filename)