# Notebook Setup Generator

# Description

Read a template notebook .py file and generate a main.js setup file for use with "Setup" nbextension

# Environment

## Library Imports

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import os, sys
import re
import json
from enum import Enum
import unittest

## Jupyter-specific Imports and Settings

In [None]:
# Data manipulation
# Options for pandas
pd.options.display.max_columns = 50
pd.options.display.max_rows = 30

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython import get_ipython
ipython = get_ipython()

# autoreload extension
if 'autoreload' not in ipython.extension_manager.loaded:
    get_ipython().magic(u'load_ext autoreload')

get_ipython().magic(u'autoreload 2')

## Local Imports

In [None]:
# import xutilities
# from xutilities import flatten

## File Paths

In [None]:
# Base Path
base_path = Path.cwd()

# Data paths
data_path = base_path / 'data'
raw_data_path = data_path / 'raw'
interim_data_path = data_path / 'interim'
processed_data_path = data_path / 'processed'
external_data_path = data_path / 'external'

# Reports paths
reports_path = base_path / 'reports'
figures_path = reports_path / 'figures'

# Input paths

# Outputs paths

# Credentials

## Constants and Globals

In [None]:
# Constants and Globals

setup_github_url = 'https://github.com/WillKoehrsen/Data-Analysis/tree/master/setup'

js_preamble = """
define(['base/js/namespace', 'base/js/events'], function (Jupyter, events) {
  // Template cells including markdown and imports
  var setUp = function () {
"""
# The contents of the setUp function are insert_cell_at_index calls in between these two

js_postamble = """
    // Run all cells
    Jupyter.notebook.execute_all_cells()
  }
  // Prompts user to enter name for notebook
  var promptName = function () {
    // Open rename notebook box if 'Untitled' in name
    // if (Jupyter.notebook.notebook_name.search('Untitled') != -1) {
    //  document.getElementsByClassName('filename')[0].click()
    // }
  }
  // Run on start
  function load_ipython_extension () {
    // Add default cells for new notebook
    if (Jupyter.notebook.get_cells().length === 1) {
      setTimeout(setUp, 500)
    } else {
      promptName()
    }
  }
  // Run when cell is executed
  events.on('execute.CodeCell', function () {
    promptName()
  })
  // Run when notebook is saved
  events.on('before_save.Notebook', function () {
    promptName()
  })
  return {
    load_ipython_extension: load_ipython_extension
  }
})
"""

re_hdr1 = re.compile('^#!\/usr\/bin\/env.*$')
re_hdr2 = re.compile('^# coding: .*$')
re_markdown = re.compile('^# (#.*)$')
re_code_cell_start = re.compile('^# In\[[0-9 ]*\].*$')
re_blank_line = re.compile('^\s*$')

re_non_markdown = re.compile('^[^#]+(.*)$')

missing_template_py_help = """
If your template notebook is called "notebook-template" in Jupyter, then choosing
"Download As... Python (.py)" will save a file in the ~/Downloads folder:
\t~/Downloads/notebook-template.py.html
Modify get_notebook_template_path() if it is named differently or in a different location

"""

missing_nbextsetuppath_help = """
Copy the setup directory from 
\t{}
to
\t{}
"""


# Code

In [None]:
def emit_cell_at_index(cell_type: str, inx: int, text: str):
    # cell_type is 'markdown' or 'code'
    #     print(text)
    xs_fmt = 'Jupyter.notebook.insert_cell_at_index(\'{}\', {}).set_text(`{}`)\n'
    return xs_fmt.format(cell_type, inx, text)


def emit_cell(cell_type: str, cell_index: int, text: str):
    xs = None
    if True:  # state != ParseState.none:
        # cell_type = 'markdown' if state == ParseState.markdown else 'code'
        xs = emit_cell_at_index(cell_type, cell_index, text)
        cell_index += 1
    return xs, cell_index


def trim_leading_and_trailing_lines(xs):
    # xs = '\n\n\n\nTest String\n\n\n\n\nSecond Line\n\nThird Line\n\n\n\n\n\n'
    trimmed = xs.split('\n')
    while trimmed and not trimmed[-1]:
        trimmed.pop()
    while trimmed and not trimmed[0]:
        trimmed.pop(0)
    return '\n'.join(trimmed)


def ignore_line(line):
    exact_matches = {
        '#!/usr/bin/env python',
        '# coding: utf-8',
        ''
    }
    return line in exact_matches


def get_notebook_template_path(notebook_template_name: str = 'notebook-template') -> Path:
    # If your template notebook is called "notebook-template", then choosing
    # "Download As... Python (.py)" will save a file in the ~/Downloads folder:
    #    ~/Downloads/notebook-template.py.html
    path_fmt = '~/Programming/IDEs/Jupyter/Templates/Base/{}.py'
    xpath = Path(path_fmt.format(notebook_template_name))
    return Path(os.path.expanduser(xpath))


def jupyter_contrib_nbextensions_slist_to_dict(slist):
    # slist is a list of lists
    # The first element of each list looks like a dictionary key
    # function named like this since it's only tested with the output from:
    #    rx = !{sys.executable} -m pip show jupyter_contrib_nbextensions
    #    jupyter_contrib_nbextensions_slist_to_dict(rx.fields())['Location']
    rx = {}
    for field in slist:
        key, val = field[0][:-1], field[1:]
        rx[key] = ' '.join(val)
    return rx


def get_sitepackages_path() -> Path:
    # https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/
    rx = get_ipython().getoutput('{sys.executable} -m pip show jupyter_contrib_nbextensions')
    return Path(jupyter_contrib_nbextensions_slist_to_dict(rx.fields())['Location'])


def get_nbextsetuppath() -> Path:
    return get_sitepackages_path() / 'jupyter_contrib_nbextensions' / 'nbextensions' / 'setup'


def yes_or_no(question):
    # Adapted from https://gist.github.com/garrettdreyfus/8153571
    reply = str(input(question + ' (y/n): ')).lower().strip()
    return True if reply[:1] == 'y' else False


In [None]:
def check_prerequisites():
    # jupyter extensions must be installed
    #... TBD
    
    # the setup folder must be in the right spot
    nbextsetuppath = get_nbextsetuppath()
    if not nbextsetuppath.exists():
        print(missing_nbextsetuppath_help.format(setup_github_url, nbextsetuppath))
        return False

    
    # a file called 'notebook-template.py.html' should be in the Downloads directory
    notebook_template_path = get_notebook_template_path()
    if not notebook_template_path.exists():
        print('Missing {}'.format(notebook_template_path))
        print(missing_template_py_help)
        return False
    
    return True

In [None]:
# Main code
class ParseState(Enum):
    none = 0
    markdown = 1
    codecell = 2


def generate_setup_javascript():
    template_path = get_notebook_template_path()
    state, prevstate = ParseState.none, ParseState.none

    cell_index = 0
    accum = ''
    need_emit = False
    setup_guts = ''
    with open(template_path) as fp:
        for ix, line in enumerate(fp):
            if not re.search(re_hdr1, line) and not re.search(re_hdr2, line):
                mm = re.match(re_markdown, line)
                if mm:
                    state, prevstate = ParseState.markdown, state
                    if need_emit:
                        xs, cell_index = emit_cell(cell_type, cell_index, trim_leading_and_trailing_lines(accum))
                        setup_guts += xs
                        accum = ''
                    cell_type = 'markdown'
                    need_emit = True
                    accum += mm.group(1)
                elif re.search(re_code_cell_start, line):
                    state, prevstate = ParseState.codecell, state
                    if need_emit:
                        xs, cell_index = emit_cell(cell_type, cell_index, trim_leading_and_trailing_lines(accum))
                        setup_guts += xs
                        accum = ''
                    cell_type = 'code'
                    need_emit = True
                    # don't append codecellstart text to accum
                else:
                    accum += line

        if need_emit:
            xs, cell_index = emit_cell(cell_type, cell_index,
                                       trim_leading_and_trailing_lines(accum + line))
            setup_guts += xs
            accum = ''

    nbextsetuppath = get_sitepackages_path() / 'jupyter_contrib_nbextensions' / 'nbextensions' / 'setup'
    mainjs_path = nbextsetuppath / 'main.js'
    ok_to_write = not mainjs_path.exists() or yes_or_no('Existing file: {}\nOverwrite?'.format(mainjs_path))
    if ok_to_write:
        with open(mainjs_path, 'w') as fp:
            print(js_preamble + setup_guts + js_postamble, file=fp)


# Unit Tests

Examples of different types of lines:  

Shell header

```
# !/usr/bin/env python
# coding: utf-8
```

Markdown: 
```
# # Title
# # Description
# ## Library Imports
# # Environment
```

Start of code cell:  
```
# In[ ]:
# In[12]:
```
Python code lines
```
import pandas as pd
import numpy as np
```

In [None]:
class Test_My_Code(unittest.TestCase):

    def __init__(self, methodName='runTest'):
        # A new TestTest instance is created for each test method
        # Thus, __init__ is called once for each test method
        super(Test_My_Code, self).__init__(methodName)
        self.regex_test_mls = """
#!/usr/bin/env python
# coding: utf-8
# # Title
# # Description
# In[ ]:

# # Environment
# ## Library Imports
# In[12]:
import pandas as pd
import numpy as np
"""

        self.regex_test_regexes = [
            re_blank_line,
            re_hdr1,
            re_hdr2,
            re_markdown,
            re_markdown,
            re_code_cell_start,
            re_blank_line,
            re_markdown,
            re_markdown,
            re_code_cell_start,
            re_non_markdown,
            re_non_markdown,
            re_blank_line
        ]

    def is_regex_match(self, regx, test: str, expected: str):
        mm = re.match(regx, test)
        if not mm:
            return False
        last_grp = len(mm.groups())
        return expected == mm.group(last_grp)

    def test_regex(self):
        for line, regx in zip(self.regex_test_mls.split('\n'), self.regex_test_regexes):
            mm = re.match(regx, line)
            self.assertTrue(mm)


## Initialization

In [None]:
# Initializations

# Main

In [None]:
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)
    
    if check_prerequisites():
        generate_setup_javascript()