<a href="https://colab.research.google.com/github/mayalenE/simple-foc-assistant/blob/main/RAG_notebook_to_html_export.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Acknowledgements: This notebook is used to convert the RAG collab into a html blogpost, and has been adapted from a notebook by [Alexander Mordvinstev](https://www.google.com/search?q=alexander+mordvinstev&oq=alexander+mordvinstev&gs_lcrp=EgZjaHJvbWUyBggAEEUYOTIJCAEQLhgNGIAEMggIAhAAGA0YHjIICAMQABgNGB4yCAgEEAAYDRge0gEINzIyM2owajeoAgCwAgA&sourceid=chrome&ie=UTF-8).

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
#@title html_prefix
!mkdir -p out

TITLE = "Building a SimpleFOC AI Assistant with Retrieval Augmented Generation (RAG)"
DESC = "Building a SimpleFOC AI Assistant with Retrieval Augmented Generation (RAG)"
URL = "https://github.com/mayalenE/simple-foc-assistant"

html_prefix = f'''<!DOCTYPE html>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Building a SimpleFOC AI Assistant with Retrieval Augmented Generation (RAG)</title>
<meta name="description" content="Building a SimpleFOC AI Assistant with Retrieval Augmented Generation (RAG)"/>
<meta property="article:author" content="Mayalen Etcheverry">
<link rel="icon" type="image/png" sizes="32x32" href="https://raw.githubusercontent.com/mayalenE/simple-foc-assistant/main/logo_white_32x32.png">
<link rel="stylesheet" href="highlight.css">
<link rel="stylesheet" href="style.css">

<script>
MathJax = {{
  tex: {{ inlineMath: [['$', '$']] }}
}};
</script>
<script type="text/javascript" id="MathJax-script" async
  src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js">
</script>
'''

In [3]:
#@title style.css
%%file out/style.css
* {
}
html {
    font-size: 16px;
    font-family: sans-serif;
     line-height: 1.6em;
}
body {
  margin: 0 auto;
  max-width: 60rem;
}
div,p,h1,h2,h3 {
    margin: 10px 10px 10px 10px;
}
h1,h2,h3 {
    margin-top: 30px;
    margin-bottom: 10px;
    line-height: 1.3em;
}
h1 {font-size: 3.0rem;}
h2 {font-size: 2.0rem;}
h3 {font-size: 1.5rem;}

table:nth-of-type(1) {
    width: 100%;
    text-align: left;
    font-size: smaller;
    margin: 10px;
    padding-bottom: 5px;
    padding-top: 5px;
    border-bottom: 1px solid rgba(0, 0, 0, 0.1);
    border-top: 1px solid rgba(0, 0, 0, 0.1);
}

table:nth-of-type(1) * {
    padding: 0;
    margin: 0;
}

table:nth-of-type(1) td {
    line-height: 1.6em;
}

table:nth-of-type(1) a {
    text-decoration: none;
    color: black;
    margin: 0;
}

th {
    font-weight: lighter;
}

table:nth-of-type(1) td:first-child {
    font-weight: bold;
}

.output,.highlight {
    display: block;
    overflow-x: auto;
    margin-top: 10px;
    line-height: 1em;
    padding-bottom: 8px;
}
.code-cell {
    margin: 20px 0 20px 0;
    display: block;
}
.output {
    font-size: 80%;
}
.highlight {
    font-size: 90%;
    line-height: 1.2em;
}
pre {
    display: inline;
}
img,video,iframe {
    display:block;
    margin: auto;
    max-width:90%;
}

.highlight .err {
    border: 0px;
}

#pdemo {
    text-align: center;
    border-bottom: 1px solid rgba(0, 0, 0, 0.1);
}

#repro {
    text-align: left;
}

.colab-root {
    display: inline-block;
    background: rgba(255, 255, 255, 0.75);
    padding: 2px 8px;
    border-radius: 4px;
    font-size: 10px!important;
    text-decoration: none;
    color: #aaa;
    font-weight: 500;
    border: solid 1px rgba(0, 0, 0, 0.08);
    border-bottom-color: rgba(0, 0, 0, 0.15);
    text-transform: uppercase;
    line-height: 16px;
}

span.colab-span {
    background-image: url(colab.svg);
    background-repeat: no-repeat;
    background-size: 20px;
    background-position-y: 1px;
    display: inline-block;
    padding-left: 24px;
}

span.github-span {
    background-image: url(github.svg);
    background-repeat: no-repeat;
    background-size: 20px;
    background-position: center right;
    display: inline-block;
    padding-right: 24px;
}


#arxiv {
    background-image: url(arxiv.svg);
    vertical-align: bottom;
    background-repeat: no-repeat;
    background-size: 30px;
    background-position: center;
    padding-left: 40px;
    /* padding-top: 6px; */
    padding-bottom: 20px;
    filter: opacity(0.3);
    cursor: not-allowed;
    /* margin-left: 10px; */
    /*    background-color: #b31b1b;*/
}

#demo {
    font-size: 25px!important;
    font-weight: bold;
    padding: 12px 12px;
    background-color: #4a9cda;
    color: #f5d36b;
    border: solid 1px rgba(0, 0, 0, 0.25);
    border-bottom-color: rgba(0, 0, 0, 0.55);
}

.toc a {
    text-decoration: none;
    color: black;
}

.toc a:hover {
    text-decoration: underline;
    color: black;
}

.toc ul {
    list-style-type: none;
    padding-left: 10px;
/*    list-style-position: inside;*/
/*    padding: 0;*/
/*    margin: 0;*/
}

.toc ul ul {
    padding-left: 15px;
    font-size: smaller;
}


@media(max-width: 1440px) {
    .toc {
        display: none;
    }
}

@media(max-width: 800px) {
    h1 {
      font-size: 1.8rem;
    }

    #reprotext {
      display: none;
    }

}

.toc {
    /* The CSS applied to our floating application */
    border-right: 1px solid rgba(0, 0, 0, 0.1);
    padding-right: 20px;
    position: sticky; /*relative;*/
    /*    bottom: 10px;*/
    background-color: white;
    /*    left: 200px;*/
    float: left;
    margin-left: -300px;
    width: 250px;
    /*left: -200px;*/
}

#colablink {
    padding: 2px 4px;
}

.markdown-output {
    display: block;
    overflow-x: auto;
    margin-top: 10px;
    line-height: 1em;
    padding-bottom: 8px;
    background: floralwhite;
}

.markdown-output h1 {
    font-size: 1.5rem;
}

.markdown-output h2 {
    font-size: 1rem;
}

blockquote {
    display: block;
    overflow-x: auto;
    margin-top: 10px;
    line-height: 1em;
    padding-bottom: 8px;
    background: floralwhite;
    font-size: small;
}

Writing out/style.css


In [4]:
#@title github.svg
%%file out/github.svg

<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8C0 11.54 2.29 14.53 5.47 15.59C5.87 15.66 6.02 15.42 6.02 15.21C6.02 15.02 6.01 14.39 6.01 13.72C4 14.09 3.48 13.23 3.32 12.78C3.23 12.55 2.84 11.84 2.5 11.65C2.22 11.5 1.82 11.13 2.49 11.12C3.12 11.11 3.57 11.7 3.72 11.94C4.44 13.15 5.59 12.81 6.05 12.6C6.12 12.08 6.33 11.73 6.56 11.53C4.78 11.33 2.92 10.64 2.92 7.58C2.92 6.71 3.23 5.99 3.74 5.43C3.66 5.23 3.38 4.41 3.82 3.31C3.82 3.31 4.49 3.1 6.02 4.13C6.66 3.95 7.34 3.86 8.02 3.86C8.7 3.86 9.38 3.95 10.02 4.13C11.55 3.09 12.22 3.31 12.22 3.31C12.66 4.41 12.38 5.23 12.3 5.43C12.81 5.99 13.12 6.7 13.12 7.58C13.12 10.65 11.25 11.33 9.47 11.53C9.76 11.78 10.01 12.26 10.01 13.01C10.01 14.08 10 14.94 10 15.21C10 15.42 10.15 15.67 10.55 15.59C13.71 14.53 16 11.53 16 8C16 3.58 12.42 0 8 0Z" fill="#1B1F23"/>
</svg>

Writing out/github.svg


In [5]:
#@title colab.svg
%%file out/colab.svg
<svg width="24px" height="15px" viewBox="0 0 24 15" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
    <!-- Generator: Sketch 48.2 (47327) - http://www.bohemiancoding.com/sketch -->
    <g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
        <g id="colab" fill-rule="nonzero">
            <path d="M1.977,11.77 C-0.69,9.493 -0.628,4.691 1.977,2.413 C2.919,3.057 3.522,4.075 4.49,4.691 C3.338,6.291 3.344,7.892 4.486,9.494 C3.522,10.111 2.918,11.126 1.977,11.77 Z" id="Shape" fill="#FDBA18"></path>
            <path d="M12.257,12.114 C10.49,10.481 9.772,8.456 10.139,6.094 C10.59,3.184 12.278,1.201 15.085,0.416 C17.65,-0.302 20.049,0.199 21.963,2.235 C21.079,2.978 20.256,3.782 19.529,4.681 C18.488,3.827 17.319,3.435 16,3.856 C13.596,4.623 12.954,7.097 14.506,9.5 C14.265,9.775 14.013,10.041 13.785,10.326 C13.295,10.939 12.511,11.3 12.257,12.114 Z" id="Shape" fill="#FCD93D"></path>
            <path d="M19.529,4.682 C20.256,3.783 21.079,2.979 21.963,2.236 C24.666,5.019 24.664,9.267 21.958,12 C19.31,14.674 15.022,14.725 12.257,12.115 C12.511,11.301 13.295,10.94 13.785,10.327 C14.013,10.042 14.265,9.775 14.506,9.501 C15.559,10.417 16.76,10.769 18.106,10.331 C20.502,9.551 21.151,6.927 19.529,4.682 Z" id="Shape" fill="#FDBA18"></path>
            <path d="M4.49,4.691 C3.522,4.075 2.919,3.057 1.977,2.413 C4.186,0.015 7.698,-0.529 10.453,1.058 C11.008,1.378 11.172,1.664 10.738,2.186 C10.581,2.374 10.48,2.608 10.347,2.817 C10.048,3.287 9.838,3.884 9.418,4.188 C8.933,4.539 8.523,3.847 8.021,3.746 C6.673,3.475 5.509,3.787 4.49,4.691 Z" id="Shape" fill="#FCD93D"></path>
            <path d="M1.977,11.77 C2.918,11.126 3.522,10.111 4.486,9.493 C5.859,10.645 7.336,10.926 8.936,9.992 C9.268,9.798 9.439,9.904 9.609,10.182 C9.995,10.817 10.362,11.467 10.79,12.072 C11.13,12.552 11.012,12.787 10.537,13.078 C7.84,14.73 4.205,14.188 1.977,11.77 Z" id="Shape" fill="#FCD93D"></path>
        </g>
    </g>
</svg>

Writing out/colab.svg


In [43]:
from google.colab import output
import os
os.system('python -m http.server 8888 &')
output.serve_kernel_port_as_window(8888)

<IPython.core.display.Javascript object>

In [44]:
import os
import itertools as it
import base64
import json
import re

import markdown
from markdown.inlinepatterns import SimpleTextPattern
from markdown.extensions import Extension
from markdown.extensions.toc import TocExtension
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import HtmlFormatter

src_dir = 'drive/My Drive/Colab Notebooks/SimpleFOC/'
colab_fn = src_dir+'SimpleFOC-AI-Assistant-RAG.ipynb'
out_dir = 'out/'
notebook_json = json.load(open(colab_fn, 'r'))

In [45]:
def save(fn, data):
    if isinstance(data, list):
        data = ''.join(data)
    mode = 'wb' if isinstance(data, bytes) else 'w'
    with open(out_dir+fn, mode) as f:
        f.write(data)

lexer, formatter = PythonLexer(), HtmlFormatter()
save('highlight.css', formatter.get_style_defs('.highlight'))

img_count = it.count()
html = []

def parse_outputs(c):

  for o in c['outputs']:
    if o['output_type'] == 'stream':
      s = '<div class="output"><pre>'+''.join(o['text'])+'</pre></div>\n'
      html.append(s)

    elif o['output_type'] == 'display_data':
        if 'text/html' in o['data']:
            html.append(o['data']['text/html'])
        elif 'text/markdown' in o['data']:
            html.append('\n<div class="markdown-output">\n' +
                        markdown.markdown(o['data']['text/markdown']) +
                        '\n</div>')
        for fmt in ['image/png', 'image/jpeg']:
            if fmt in o['data']:
                fn = f'{next(img_count):02}.{fmt.split("/")[1]}'
                save(fn, base64.b64decode(o['data'][fmt]))
                html.append(f'<img src="{fn}">')

cells_iter = iter(notebook_json['cells'])
md = []
for c in cells_iter:
  source = ''.join(c['source'])
  if source.startswith('# !export'):
    break  # article body end
  if c['cell_type'] == 'markdown':
    html.append(source+'\n')
    md.append(source)
  elif c['cell_type'] == 'code':
    html.append('\n<div class="code-cell">\n')
    if c['metadata'].get('cellView') != 'form':
        html.append(highlight(source, lexer, formatter)+'\n')
    parse_outputs(c)
    html.append('</div>\n')

class MathEscapeExtension(Extension):
    def extendMarkdown(self, md):
        md.inlinePatterns.register(SimpleTextPattern(r'(\$.+?\$)'), 'math', 175)

html = markdown.markdown("".join(html), extensions=['tables', 'fenced_code', TocExtension(toc_depth='2-4'), MathEscapeExtension()])
save('index.html', html_prefix+html)

In [9]:
!zip out.zip out/*

  adding: out/colab.svg (deflated 55%)
  adding: out/github.svg (deflated 49%)
  adding: out/highlight.css (deflated 79%)
  adding: out/index.html (deflated 81%)
  adding: out/style.css (deflated 72%)


In [None]:
cp out.zip "{src_dir}"