<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Generate Readme for Awesome Notebooks

## Input

### Import librairies

In [82]:
import os
import requests
import pandas as pd
import urllib.parse
try:
    from git import Repo
except:
    !pip install GitPython
    from git import Repo
from naas_drivers import markdown

### Setup Variables

In [4]:
{
    'tool': 'AWS',
    'notebook': 'Daily biling notification to slack',
    'tags': [
        '#aws',
        '#cloud',
        '#storage',
        '#S3bucket',
        '#slack',
        '#operations',
        '#automation'
    ],
    'action': '<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/AWS/AWS_Daily_biling_notification_to_slack.ipynb" target="_parent"><img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/></a>',
    'author': 'Maxime Jublou',
    'author_url': 'https://www.linkedin.com/in/maximejublou/',
    'description': 'This notebook sends a daily notification to a Slack channel with the billing information from an AWS account. It allows users to easily keep track of their AWS spending.',
    "open_in_lab": "https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/AWS/AWS_Daily_biling_notification_to_slack.ipynb",
    "open_in_chat": "https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/AWS/AWS_Daily_biling_notification_to_slack.ipynb",
    "notebook_url": "https://github.com/jupyter-naas/awesome-notebooks/blob/master/LinkedIn/LinkedIn_Follow_content_views_weekly.ipynb",
    "imports": ["os.path"],
    "updated_at": "",
    "image_url": "",
}

{'tool': 'AWS',
 'notebook': 'Daily biling notification to slack',
 'tags': ['#aws',
  '#cloud',
  '#storage',
  '#S3bucket',
  '#slack',
  '#operations',
  '#automation'],
 'action': '<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/AWS/AWS_Daily_biling_notification_to_slack.ipynb" target="_parent"><img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/></a>',
 'author': 'Maxime Jublou',
 'author_url': 'https://www.linkedin.com/in/maximejublou/',
 'description': 'This notebook sends a daily notification to a Slack channel with the billing information from an AWS account. It allows users to easily keep track of their AWS spending.',
 'open_in_lab': 'https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/AWS/AWS_Daily_biling_notification_to_slack.ipynb',
 'open_in_chat': 'https://app.naas.ai/user-redirect/naas/

In [83]:
# Inputs
readme_template = "README_template.md"
naas_lab_logo = "https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"
naas_chat_logo = "https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"
template_request = "https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=&template=template-request.md&title=Tool+-+Action+of+the+notebook+"
bug_report = "https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title="
start_data_product = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/Naas/Naas_Start_data_product.ipynb"

# Outputs
json_file = "templates.json"
naas_lab_url ='https://app.naas.ai/user-redirect/naas/downloader?url='
naas_chat_url = "https://workspace.naas.ai/chat/use?plugin_url="
readme = "README.md"

## Model

### Get all notebooks

In [84]:
def get_all_notebooks():
    # Init
    html_url_base = "https://github.com/jupyter-naas/awesome-notebooks/blob/master"
    raw_url_base = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master"
    notebooks = []
    res_json = []
    
    # Connect to GitHub and get branch
    repo = Repo('.')
    branch = repo.active_branch
    
    # Get tree from json
    url = f"https://api.github.com/repos/jupyter-naas/awesome-notebooks/git/trees/{branch.name}?recursive=1"
    res = requests.get(url)
    if res.status_code == 200:
        res_json = res.json().get("tree")
    
    # Loop on files
    for r in res_json:
        root = None
        file_name = None
        file_path = r.get("path")
        notebook_path = urllib.parse.quote(file_path)
        if ".github" not in file_path and ".gitignore" not in file_path and "/" in file_path:
            if file_path.endswith(".ipynb"):
                data = {
                    "tool": file_path.split("/")[0],
                    "notebook_name": file_path.split("/")[1],
                    "notebook_path": notebook_path,
                    "html_url": f"{html_url_base}/{notebook_path}",
                    "raw_url": f"{raw_url_base}/{notebook_path}",
                }
                notebooks.append(data)
    return pd.DataFrame(notebooks)

df_notebooks = get_all_notebooks()
print("Notebooks fetched:", len(df_notebooks))
df_notebooks.head(1)

Notebooks fetched: 752


Unnamed: 0,tool,notebook_name,notebook_path,html_url,raw_url
0,AWS,AWS_Daily_biling_notification_to_slack.ipynb,AWS/AWS_Daily_biling_notification_to_slack.ipynb,https://github.com/jupyter-naas/awesome-notebo...,https://raw.githubusercontent.com/jupyter-naas...


### Create header button, generate markdown & json 

In [91]:
def create_open_button(
    tool,
    title,
    open_in_lab,
    naas_lab_logo,
    open_in_chat,
    naas_chat_logo,
    template_request,
    bug_report,
    naas_lab_url,
    start_data_product,
):
    # Init
    notebook_title = ''
    open_in_lab_url = ''
    open_in_chat_url = ''
    template_request_url = ''
    bug_report_url = ''
    generate_data_product_url = ''
    
    # Create notebook title
    notebook_title = f"# {tool} - {title}\n" # Jupyter Notebooks - Get libraries
    
    # Create logos
    if open_in_lab != "":
        open_in_lab_url = f"""<a href="{open_in_lab}" target="_parent"><img src="{naas_lab_logo}"/></a>"""
    if open_in_chat != "":
        open_in_chat_url = f"""<a href="{open_in_chat}" target="_parent"><img src="{naas_chat_logo}"/></a>"""
        
    # Hyperlinks
    template_request_url = f"""<a href="{template_request}">Template request</a>"""
    
    title_url = (f"{tool}-{title}").replace(" ", "+")
    bug_report_url = f"""<a href="{bug_report}{title_url}:+Error+short+description">Bug report</a>"""
    
    start_data_product_url = f"{naas_lab_url}/{start_data_product}"
    generate_data_product_url = f"""<a href="{start_data_product_url}" target="_parent">Generate Data Product</a>"""
    return f"""{notebook_title}{open_in_lab_url}{open_in_chat_url}<br><br>{template_request_url} | {bug_report_url} | {generate_data_product_url}"""

def get_imports(sources, imports):
    # Loop on sources
    for source in sources:
        if "from" in source and "import" in source:
            lib = (
                source.replace("\n", "")
                .split("from")[-1]
                .split("import")[0]
                .strip()
            )
            module = (
                source.replace("\n", "")
                .split("import")[-1]
                .split(" as ")[0]
                .strip()
            )
            libraries.append(f"{lib}.{module}")
        if "from" not in source and "import" in source:
            library = (
                source.replace("\n", "")
                .split("import")[-1]
                .split(" as ")[0]
                .strip()
            )
            imports.append(library)
    return imports

def get_notebook_info(url):
    # Init
    action = ""
    title = ""
    tags = ""
    author = ""
    author_url = ""
    description = ""
    plugin = False
    imports = []
    
    # Request
    res = requests.get(url)
#     return res.json()
    
    # Manage result
    if res.status_code == 200:
        res_json = res.json()
        cells = res_json.get("cells")
        
        # Get metadata store in fixed cells
        title = cells[1].get("source")[0].replace("#", "").split("-")[-1].strip()
        action = "".join(cells[1].get("source")).split("<br><br>")[0].split("\n")[-1].strip()
        tags = cells[2].get("source")[0].replace("**Tags:**", "").strip()
        tags = [f"#{tag.strip()}" for tag in tags.split("#") if tag != ""]
        author = cells[3].get("source")[0].replace("**Author:**", "").strip()
        author_name = author.replace("[", "").replace("]", "").split("(")[0].strip()
        if "(" in author:
            author_url = author.split("(")[-1].replace(")", "")
        description = "".join(cells[4].get("source")).replace("**Description:**", "").strip()
        
        # Get metadata store in variables cells
        for cell in cells:
            cell_type = cell.get("cell_type")
            metadata = cell.get("metadata")
            sources = cell.get("source")
            if cell_type == "code":
                imports = get_imports(sources, imports)
            if metadata:
                metadata_tags = metadata.get("tags")
                if len(metadata_tags) > 0 and not plugin:
                    for m in metadata_tags:
                        if m == "plugin":
                            plugin = True
                            break
        
    return title, action, tags, author_name, author_url, description, imports, plugin

url = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/OpenAI/OpenAI_Act_as_a_chef.ipynb"
title, action, tags, author_name, author_url, description, imports, plugin = get_notebook_info(url)
print("- Title:", title)
print("- Action:", action)
print("- Tags:", tags)
print("- Author:", author_name)
print("- Author URL:", author_url)
print("- Description:", description)
print("- Imports:", imports)
print("- Plugin:", plugin)

- Title: Act as a chef
- Action: <a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/OpenAI/OpenAI_Act_as_a_chef.ipynb" target="_parent"><img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/></a>
- Tags: ['#openai', '#chef', '#cooking', '#ai', '#machinelearning', '#deeplearning']
- Author: Florent Ravenel
- Author URL: https://www.linkedin.com/in/florent-ravenel/
- Description: This notebook will create a plugin to act as a chef and use OpenAI to create delicious recipes.
- Imports: ['json', 'naas']
- Plugin: True


In [92]:
generated_list = ""
json_templates = []
folder = None

for row in df_notebooks.itertuples():
    tool = row.tool
    raw_url = row.raw_url
    html_url = row.html_url
    open_in_lab = f"{naas_lab_url}{raw_url}"
    open_in_chat = ""
    
    # Get data from notebook
    title, action, tags, author_name, author_url, description, imports, plugin = get_notebook_info(raw_url)
    
    # Create Open in MyChatGPT URL
    if plugin:
        open_in_chat = f"{naas_chat_url}{raw_url}"
        
    # Create OpenButton
    open_button = create_open_button(
        tool,
        title,
        open_in_lab,
        naas_lab_logo,
        open_in_chat,
        naas_chat_logo,
        template_request,
        bug_report,
        naas_lab_url,
        start_data_product,
    )
    # Display open button
    markdown.display(open_button)
    break

    # Update OpenButton in notebook
    

    # Update json
    new_json = {
        'tool': tool,
        'notebook': title,
        'action': action,
        'tags': tags,
        'author': author_name,
        'author_url': author_url,
        'description':  description,
        "open_in_lab": open_in_lab,
        "open_in_chat": open_in_chat,
        "notebook_url": html_url,
        "imports": imports,
        "updated_at": "",
        "image_url": "",
    }
    json_templates.append(new_json)
    
#     new_folder = row.tool
#     if new_folder != folder:
#         generated_list += f"\n## {new_folder}\n"
        
    break
    
from pprint import pprint    
pprint(new_json)

{'action': '<a '
           'href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/AWS/AWS_Daily_biling_notification_to_slack.ipynb" '
           'target="_parent"><img '
           'src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/></a>',
 'author': 'Maxime Jublou',
 'author_url': 'https://www.linkedin.com/in/maximejublou/',
 'description': 'This notebook sends a daily notification to a Slack channel '
                'with the billing information from an AWS account. It allows '
                'users to easily keep track of their AWS spending.',
 'image_url': '',
 'imports': ['datetime',
             'boto3',
             'naas',
             'dateutil.relativedelta',
             'pandas',
             'naas_drivers'],
 'notebook': 'Daily biling notification to slack',
 'notebook_url': 'https://github.com/jupyter-naas/awesome-notebooks/blob/master/AWS/AWS_Daily_biling_notification_t

### Reformat functions

In [5]:
def reformat_file_name(file):
    file_nice = file.replace('_', ' ')
    file_nice = file_nice.replace(notebook_ext, '')
    file_nice = file_nice.replace(folder_nice, '')
    file_nice = file_nice.strip()
    if (file_nice != ""):
            file_nice = file_nice[0].capitalize() + file_nice[1:]
    return file_nice

### Get functions

In [5]:
def get_open_button(download_link, title_url):
    return f"""<a href="{download_link}" target="_parent"><img src="{naas_logo}"/></a><br><br><a href="{template_request}">Template request</a> | <a href="{bug_report}{title_url}:+Error+short+description">Bug report</a> | <a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/Naas/Naas_Start_data_product.ipynb" target="_parent">Generate Data Product</a>"""

def get_title(folder_nice, file_nice, download_link):
    title_url = (f"{folder_nice} - {file_nice}").replace(" ", "+")
    return f"""# {folder_nice} - {file_nice}\n{get_open_button(download_link, title_url)}"""

def get_tags(text):
    result = []
    if len(text) > 0:
        tags = text.split(' ')
        for tag in tags:
            if len(tag) >= 2 and tag[0] == '#' and tag[1] != ' ' and tag[1] != '#':
                result.append(tag)
    return result

def get_author(text):
    author = "Unknown author"
    url = "Unknown author URL"
    if len(text) > 0:
        author = text.split("**Author:**")[-1].split("]")[0].replace("[", "").strip()
        url = text.split("**Author:**")[-1].split("(")[-1].replace(")", "").strip()
    return author, url

def get_description(text):
    description = "No description."
    if len(text) > 0 and text != "## Input":
        description = text.split("**Description:**")[-1].split("]")[0].replace("[", "").strip()
    return description

### Set 'Naas Download' link on notebook

In [6]:
def set_notebook_title_and_get_meta(notebook_path, title_source, final_title, good_format):
    header_found = False
    tag_found = False
    tags = None
    author = None
    author_url = None
    description = None
    count = 0
    nb = load_notebook_node(notebook_path)
    nb = copy.deepcopy(nb)
    # Parse the entire notebook
    for cell in nb.cells:
        source = cell.source
        # Clean outputs
        if cell.cell_type == "code":
            nb.cells[count].outputs = []
        # Get the header cell
        if not header_found and cell.cell_type == "markdown" and len(source) > 2 and source[0] == '#' and source[1] == ' ':
            nb.cells[count].source = title_source
            header_found = True
        count += 1
    # Set the good title format in the notabook
    write_ipynb(nb, notebook_path)
    # Rename the notebook if the tool name is not the same
    if good_format == 1:
        os.rename(notebook_path, final_title)
        
    # Meta
    tags = get_tags(nb.cells[2].get("source"))
    author, author_url = get_author(nb.cells[3].get("source"))
    description = get_description(nb.cells[4].get("source"))
    return tags, author, author_url, description

### Convert filepath in Markdown text

In [7]:
def get_file_md(folder_nice, folder_url, files, json_templates, title_sep="##", subtitle_sep="*"):
    good_format = 0
    final_title = ""
    md = ""
    folder_name = ""
    tool_name = ""
    tool_title = ""

    if (len(files) > 0):
        md += f"\n{title_sep} {folder_nice}\n"
        for file in files:
            if file.endswith(notebook_ext):
                good_format = 0
                file_url = urllib.parse.quote(file)
                folder_name = folder_nice
                temp = folder_name.split("_")
                tool_name = temp[0]
                file_nice = reformat_file_name(file)
                # Check if the tool name is the same as the tool name in the notebook name
                if tool_name != folder_name:
                    temp = file.split("_")
                    del temp[0]
                    tool_title = folder_name + "_"
                    for i in temp:
                        tool_title += i + "_"
                    final_title = folder_name + "/" + tool_title[:-1]
                    good_format = 1
                path = urllib.parse.unquote(f"{folder_url}/{file_url}")
                # Get the download URL
                dl_url = f"{naas_download_url}{github_download_url}{folder_url}/{file_url}"
                # Put the title to the format "TOOLS - NAME_OF_NOTEBOOK Open_In_Naas"
                title = get_title(folder_nice, file_nice, dl_url)
                # Set the good title format and get meta from the notebooks of the folder
                tags, author, author_url, description = set_notebook_title_and_get_meta(path, title, final_title, good_format)
                # Get the name of the Notebook and the redirect to github link
                nb_redirect = f"[{file_nice}]({github_url}/{folder_url}/{file_url})"
                # Get the open in naas format
                title_url = (f"{folder_nice} - {file_nice}").replace(" ", "+")
                open_button = get_open_button(dl_url, title_url)
                # For the actual file, put the nnotebook name and the gihub link for the return in markdown
                md += f"{subtitle_sep} {nb_redirect}\n"
                new_json = {
                    'tool': folder_nice,
                    'notebook': file_nice,
                    'tags': tags,
                    'update': '',
                    'action': open_button.split("<br><br>")[0],
                    'author': author,
                    'author_url': author_url,
                    'description': description
                }
                json_templates.append(new_json)
    return md

In [None]:
new_json = {
    'tool': folder_nice,
    'notebook': file_nice,
    'tags': tags,
    'author': author,
    'author_url': author_url,
    'description': description,
    'last_update': '',
    'action': open_button.split("<br><br>")[0],
    'action': ,
}

In [8]:
generated_list = ""
json_templates = []
list_of_tools = []
index_max = len(notebooks)
index = 0

while index <= (index_max) - 1:
    folder_nice = notebooks[index].get("root")
    if folder_nice not in list_of_tools and folder_nice != "":
        md_round = ""
        files = []
        list_of_tools.append(folder_nice)
        folder_url = urllib.parse.quote(folder_nice)
        print(folder_nice)
        while True:
            if notebooks[index].get("root") != folder_nice:
                break
            print(notebooks[index].get("subdir"))
            files.append(notebooks[index].get("subdir"))
            index += 1
            if index == index_max:
                break
        if ("/" not in folder_nice):
            md_round += get_file_md(folder_nice, folder_url, files, json_templates)
        else:
            folder_url = urllib.parse.quote(folder_nice)
            subfolder_nice = folder_nice.split('/')[1].replace('_', ' ').replace(folder_nice, '').strip()
            md_round += get_file_md(subfolder_nice, folder_url, files, json_templates, "\t###", "\t-")
        generated_list += md_round

NameError: name 'notebooks' is not defined

## Output

### Preview the generated list

In [None]:
naas_drivers.markdown.display(generated_list)

### Generate README.md

In [None]:
# Open README template
template = open(readme_template).read()

# Replace var to get list of templates in markdown format
template = template.replace("[[DYNAMIC_LIST]]", generated_list)

# Save README
f  = open(readme, "w+")
f.write(template)
f.close()

### Generate json for naas manager

In [None]:
with open(json_file, 'w') as f:
    json.dump(json_templates, f)