#### Jupyter notebook generator

In [3]:
from typing import List
import os
import random
from jinja2 import FileSystemLoader, Environment, BaseLoader
import textwrap
import inspect

In [4]:
# Notebook template
notebook_tpl = """
{
 "cells": [
   {% for cell_rec in ctx.cells -%}
   {{cell_rec}}{{ "," if not loop.last else "" }}
   {% endfor %}
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
"""

# Template for the python code.
code_tpl = """
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "{{ctx.id}}",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [""]
    }
   ],
   "source": ["{{ctx.content}}"]
  }
"""

markdown_tpl = """
 {
   "cell_type": "markdown",
   "id": "{{ctx.id}}",
   "metadata": {},
   "source": [
    "{{ctx.content}}"
   ]
  }
"""

scatterplot_tpl = """
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "{{ctx.id}}",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [""]
    }
   ],
   "source": [
    "sns.scatterplot(data={{ctx.df}}, x='{{ctx.x}}', y='{{ctx.y}}', hue='{{ctx.hue}}')"
   ]
  }
"""

graph_code_part1 = """
grid = sns.FacetGrid(train_df, col='Survived', row='Pclass', height=2.2, aspect=1.6)
grid.map(plt.hist, 'Age', color="salmon", alpha=.5, bins=20)
grid.add_legend();
"""

graph_code_part2 = """
    sns.relplot(x='Fare', y='Age', hue='Survived', style='Survived',
                col='Sex', data=train_df, height=4);
"""

In [5]:
def _render_template_from_file(tpl_name: str, ctx: dict) -> str:
    """Combine the template with the data from ctx."""
    loader = FileSystemLoader(searchpath='./templates')
    jin_env = Environment(loader=loader)
    template = jin_env.get_template(tpl_name)
    return template.render(ctx=ctx)


def _render_template_from_str(tpl_str: str, ctx: dict) -> str:
    """Combine the template with the data from ctx."""
    template = Environment(loader=BaseLoader()).from_string(tpl_str)
    return template.render(ctx=ctx)



def _save_notebook(file_name: str, nb_code: str):
    """Save generated notebook"""
    base_path = './generated'
    file_path = os.path.join(base_path, f'{file_name}.ipynb')
    os.makedirs(base_path, exist_ok=True)
    with open(file_path, 'w') as out_file:
        out_file.write(nb_code)

# noinspection SpellCheckingInspection


class NotebookGenerator:
    def __init__(self):
        self.prev_id = ''

    def _jupiter_txt(self, text: str):
        aligned_result = textwrap.dedent(text)
        clean_result = aligned_result.replace('"', '\\"')
        parts = clean_result.split('\n')
        if parts[0] == '':
            parts = parts[1:]
        return '\\n'.join(parts)
        
    def get_cell_id(self):
        random.seed(self.prev_id)
        self.prev_id = '%08x' % random.randrange(16 ** 8)
        return self.prev_id

    def code(self, code_str: str) -> str:
        ctx = {'id': self.get_cell_id(),
               'content': self._jupiter_txt(code_str)
              }
        data = _render_template_from_str(code_tpl, ctx)
        return data


    def markdown(self, contents):
        ctx = {'id': self.get_cell_id(),
               'content': self._jupiter_txt(contents)
              }
        data = _render_template_from_str(markdown_tpl, ctx)
        return data

    def scatter_plot(self, df_name: str, x: str, y: str, hue: str):
        if hue == '':
            hue = 'None'
        ctx = {'id': self.get_cell_id(),
               'df': df_name, 
               'x': x,
               'y': y,
               'hue': hue}
        data = _render_template_from_str(scatterplot_tpl, ctx)
        return data
       
        
    @classmethod
    def render_notebook(cls, cell_list):
        ctx = {'cells': cell_list}
        data = _render_template_from_str(notebook_tpl, ctx)
        return data


def header(title: str):
    filler = '-' * 20
    print(f'{filler}[{title}]{filler}')



import_str = """
        import pandas as pd
        import numpy as np
        # Loading the plotting libraries.
        import seaborn as sns
        import matplotlib.pyplot as plt
        %matplotlib inline
"""

load_df_str = """
        train_df = pd.read_csv('../data/train.csv')
"""

def generate_notebook():
    nb = NotebookGenerator()
    cell_list = [
        nb.markdown("""## Import packages"""),
        nb.code(import_str),
        nb.markdown("""## Loading datafrane"""),
        nb.code(load_df_str),
        nb.markdown("""## Draw scatter Age x Fare - PClass"""),
        nb.scatter_plot(df_name='train_df', x='Age', y='Fare', hue='Pclass'),
        nb.markdown("""## Draw scatter Age x Fare - Survived"""),
        nb.scatter_plot(df_name='train_df', x='Age', y='Fare', hue='Survived'),
        nb.markdown("""## Draw graphs from function code"""),
        nb.code(graph_code_part1),
        nb.code(graph_code_part2)
    ]
    header('Dedup')
    nb_data = nb.render_notebook(cell_list)
    print(nb_data)
    _save_notebook('test_nb', nb_data)


generate_notebook()

--------------------[Dedup]--------------------

{
 "cells": [
   
 {
   "cell_type": "markdown",
   "id": "f5d15920",
   "metadata": {},
   "source": [
    "## Import packages"
   ]
  },
   
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "88d68648",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [""]
    }
   ],
   "source": ["import pandas as pd\nimport numpy as np\n# Loading the plotting libraries.\nimport seaborn as sns\nimport matplotlib.pyplot as plt\n%matplotlib inline\n"]
  },
   
 {
   "cell_type": "markdown",
   "id": "0b47995a",
   "metadata": {},
   "source": [
    "## Loading datafrane"
   ]
  },
   
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3c842366",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [""]
    }
   ],
   "source": ["train_df = pd.read_csv('../data/train.csv')\n"]
  },
   
 {
   "cell_type": "markdown",
   