<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Generate Readme for Awesome Notebooks

## Input

### Import librairies

In [None]:
import os
import requests
import pandas as pd
import naas_drivers
import urllib.parse
import json
import copy
import markdown
import nbformat
from nbconvert import MarkdownExporter
from papermill.iorw import (
    load_notebook_node,
    write_ipynb,
)
try:
    from git import Repo
except:
    !pip install GitPython
    from git import Repo

### Variables

In [None]:
# README variables
readme_template = "README_template.md"
readme = "README.md"
replace_var = "[[DYNAMIC_LIST]]"

# Json output
json_file = "templates.json"

# Others
current_file = '.'
notebook_ext = '.ipynb'
github_url = 'https://github.com/jupyter-naas/awesome-notebooks/tree/master'
github_download_url = 'https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/'
naas_download_url ='https://app.naas.ai/user-redirect/naas/downloader?url='
naas_logo ='https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg'
template_request = "https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=&template=template-request.md&title=Tool+-+Action+of+the+notebook+"
bug_report = "https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title="

### Get files list

In [None]:
repo = Repo('.')
branch = repo.active_branch
list_of_dir = f"https://api.github.com/repos/jupyter-naas/awesome-notebooks/git/trees/{branch.name}?recursive=1"
r_gh = requests.get(list_of_dir).json().get("tree")
notebooks = []

for file in r_gh:
    if ".github" not in file.get("path") and ".gitignore" not in file.get("path") and "/" in file.get("path"):
        if file.get("path").endswith(".ipynb"):
            temp = file.get("path").split("/")
            if temp == -1:
                data = {
                    "root": None,
                    "subdir": file.get("path")
                }
                notebooks.append(data)
            else:
                last_folder = ""
                file_name = temp[-1]
                temp.pop()
                for folder in temp:
                    last_folder += "/" + folder
                root = last_folder[1:]
                data = {
                    "root": root,
                    "subdir": file_name
                }
                notebooks.append(data)

df_github = pd.DataFrame(notebooks)
df_github

## Model

### Reformat functions

In [None]:
def reformat_file_name(file):
    file_nice = file.replace('_', ' ')
    file_nice = file_nice.replace(notebook_ext, '')
    file_nice = file_nice.replace(folder_nice, '')
    file_nice = file_nice.strip()
    if (file_nice != ""):
            file_nice = file_nice[0].capitalize() + file_nice[1:]
    return file_nice

### Get functions

In [None]:
def get_open_button(download_link, title_url):
    return f"""<a href="{download_link}" target="_parent"><img src="{naas_logo}"/></a><br><br><a href="{template_request}">Template request</a> | <a href="{bug_report}{title_url}:+Error+short+description">Bug report</a>"""

def get_title(folder_nice, file_nice, download_link):
    title_url = (f"{folder_nice} - {file_nice}").replace(" ", "+")
    return f"""# {folder_nice} - {file_nice}\n{get_open_button(download_link, title_url)}"""

def get_tags(text):
    result = []
    if len(text) > 0:
        tags = text.split(' ')
        for tag in tags:
            if len(tag) >= 2 and tag[0] == '#' and tag[1] != ' ' and tag[1] != '#':
                result.append(tag)
    return result

def get_author(text):
    author = "Unknown author"
    url = "Unknown author URL"
    if len(text) > 0:
        author = text.split("**Author:**")[-1].split("]")[0].replace("[", "").strip()
        url = text.split("**Author:**")[-1].split("(")[-1].replace(")", "").strip()
    return author, url

def get_description(text):
    description = "No description."
    if len(text) > 0 and text != "## Input":
        description = text.split("**Description:**")[-1].split("]")[0].replace("[", "").strip()
    return description

### Set 'Naas Download' link on notebook

In [None]:
def set_notebook_title_and_get_meta(notebook_path, title_source, final_title, good_format):
    header_found = False
    tag_found = False
    tags = None
    author = None
    author_url = None
    description = None
    count = 0
    nb = load_notebook_node(notebook_path)
    nb = copy.deepcopy(nb)
    # Parse the entire notebook
    for cell in nb.cells:
        source = cell.source
        # Clean outputs
        if cell.cell_type == "code":
            nb.cells[count].outputs = []
        # Get the header cell
        if not header_found and cell.cell_type == "markdown" and len(source) > 2 and source[0] == '#' and source[1] == ' ':
            nb.cells[count].source = title_source
            header_found = True
        count += 1
    # Set the good title format in the notabook
    write_ipynb(nb, notebook_path)
    # Rename the notebook if the tool name is not the same
    if good_format == 1:
        os.rename(notebook_path, final_title)
        
    # Meta
    tags = get_tags(nb.cells[2].get("source"))
    author, author_url = get_author(nb.cells[3].get("source"))
    description = get_description(nb.cells[4].get("source"))
    return tags, author, author_url, description

### Convert filepath in Markdown text

In [None]:
def get_file_md(folder_nice, folder_url, files, json_templates, title_sep="##", subtitle_sep="*"):
    good_format = 0
    final_title = ""
    md = ""
    folder_name = ""
    tool_name = ""
    tool_title = ""

    if (len(files) > 0):
        md += f"\n{title_sep} {folder_nice}\n"
        for file in files:
            if file.endswith(notebook_ext):
                good_format = 0
                file_url = urllib.parse.quote(file)
                folder_name = folder_nice
                temp = folder_name.split("_")
                tool_name = temp[0]
                file_nice = reformat_file_name(file)
                # Check if the tool name is the same as the tool name in the notebook name
                if tool_name != folder_name:
                    temp = file.split("_")
                    del temp[0]
                    tool_title = folder_name + "_"
                    for i in temp:
                        tool_title += i + "_"
                    final_title = folder_name + "/" + tool_title[:-1]
                    good_format = 1
                path = urllib.parse.unquote(f"{folder_url}/{file_url}")
                # Get the download URL
                dl_url = f"{naas_download_url}{github_download_url}{folder_url}/{file_url}"
                # Put the title to the format "TOOLS - NAME_OF_NOTEBOOK Open_In_Naas"
                title = get_title(folder_nice, file_nice, dl_url)
                # Set the good title format and get meta from the notebooks of the folder
                tags, author, author_url, description = set_notebook_title_and_get_meta(path, title, final_title, good_format)
                # Get the name of the Notebook and the redirect to github link
                nb_redirect = f"[{file_nice}]({github_url}/{folder_url}/{file_url})"
                # Get the open in naas format
                title_url = (f"{folder_nice} - {file_nice}").replace(" ", "+")
                open_button = get_open_button(dl_url, title_url)
                # For the actual file, put the nnotebook name and the gihub link for the return in markdown
                md += f"{subtitle_sep} {nb_redirect}\n"
                new_json = {
                    'tool': folder_nice,
                    'notebook': file_nice,
                    'tags': tags,
                    'update': '',
                    'action': open_button.split("<br><br>")[0],
                    'author': author,
                    'author_url': author_url,
                    'description': description
                }
                json_templates.append(new_json)
    return md

### Generate markdown for each notebooks

In [None]:
generated_list = ""
json_templates = []
list_of_tools = []
index_max = len(notebooks)
index = 0

while index <= (index_max) - 1:
    folder_nice = notebooks[index].get("root")
    if folder_nice not in list_of_tools and folder_nice != "":
        md_round = ""
        files = []
        list_of_tools.append(folder_nice)
        folder_url = urllib.parse.quote(folder_nice)
        print(folder_nice)
        while True:
            if notebooks[index].get("root") != folder_nice:
                break
            print(notebooks[index].get("subdir"))
            files.append(notebooks[index].get("subdir"))
            index += 1
            if index == index_max:
                break
        if ("/" not in folder_nice):
            md_round += get_file_md(folder_nice, folder_url, files, json_templates)
        else:
            folder_url = urllib.parse.quote(folder_nice)
            subfolder_nice = folder_nice.split('/')[1].replace('_', ' ').replace(folder_nice, '').strip()
            md_round += get_file_md(subfolder_nice, folder_url, files, json_templates, "\t###", "\t-")
        generated_list += md_round

## Output

### Preview the generated list

In [None]:
naas_drivers.markdown.display(generated_list)

### Generate readme for github repository

In [None]:
# Open README template
template = open(readme_template).read()

# Replace var to get list of templates in markdown format
template = template.replace(replace_var, generated_list)

# Save README
f  = open(readme, "w+")
f.write(template)
f.close()

### Generate json for naas manager

In [None]:
with open(json_file, 'w') as f:
    json.dump(json_templates, f)