<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Generate Readme for Awesome Notebooks

## Input

### Import librairies

In [None]:
import os
import requests
import pandas as pd
import urllib.parse
try:
    from git import Repo
except:
    !pip install GitPython
    from git import Repo
from naas_drivers import markdown
from pprint import pprint
import json
from papermill.iorw import (
    load_notebook_node,
    write_ipynb,
)
import copy
import hashlib

### Setup variables
- `readme_template`: This variable stores the file name or path of the README template file. It is used as a template to generate the final README file.
- `naas_lab_logo`: This variable contains the URL of the Naas Lab logo image.
- `naas_chat_logo`: This variable holds the URL of the Naas Chat logo image.
- `template_request`: This variable represents the URL for submitting a template request on GitHub. It includes parameters such as assignees, labels, template, and title.
- `bug_report`: This variable stores the URL for submitting a bug report on GitHub. It includes parameters such as assignees, labels, template, and title.
- `start_data_product`: This variable contains the URL for the "Naas_Start_data_product" notebook.
- `json_file`: This variable represents the file name or path for the output JSON file that will store the templates.
- `naas_lab_url`: This variable holds the URL prefix for accessing Naas Lab resources.
- `naas_chat_url`: This variable represents the URL prefix for using Naas Chat plugins.
- `readme`: This variable stores the file name or path for the final README file.

In [None]:
# Inputs
readme_template = "README_template.md"
naas_lab_logo = "https://naasai-public.s3.eu-west-3.amazonaws.com/Open_in_Naas_Lab.svg"
naas_chat_logo = "https://naasai-public.s3.eu-west-3.amazonaws.com/Open_in_Naas_Chat.svg"
template_request = "https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=&template=template-request.md&title=Tool+-+Action+of+the+notebook+"
bug_report = "https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title="
start_data_product = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/Naas/Naas_Start_data_product.ipynb"

# Outputs
json_file = "templates.json"
naas_lab_url ='https://app.naas.ai/user-redirect/naas/downloader?url='
naas_chat_url = "https://naas.ai/chat/use?plugin_url="
readme = "README.md"

## Model

### Get all notebooks

In [None]:
def get_all_notebooks():
    # Init
    html_url_base = "https://github.com/jupyter-naas/awesome-notebooks/blob/master"
    raw_url_base = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master"
    notebooks = []
    res_json = []
    
    # Connect to GitHub and get branch
    repo = Repo('.')
    branch = repo.active_branch
    
    # Get tree from json
    url = f"https://api.github.com/repos/jupyter-naas/awesome-notebooks/git/trees/{branch.name}?recursive=1"
    res = requests.get(url)
    if res.status_code == 200:
        res_json = res.json().get("tree")
    
    # Loop on files
    for r in res_json:
        root = None
        file_name = None
        file_path = r.get("path")
        notebook_path = urllib.parse.quote(file_path)
        if ".github" not in file_path and ".gitignore" not in file_path and "/" in file_path:
            if file_path.endswith(".ipynb"):
                data = {
                    "tool": file_path.split("/")[0],
                    "notebook_name": file_path.split("/")[1],
                    "notebook_path": file_path,
                    "html_url": f"{html_url_base}/{notebook_path}",
                    "raw_url": f"{raw_url_base}/{notebook_path}",
                }
                notebooks.append(data)
    return pd.DataFrame(notebooks)

df_notebooks = get_all_notebooks()
print("✅ Notebooks fetched:", len(df_notebooks))
df_notebooks.head(1)

### Create title cell, generate markdown for README & json for naas search

#### Encode notebook name

In [None]:
def encode_notebook_name(notebook_name):
    # Convert the notebook name to bytes
    notebook_name_bytes = notebook_name.encode('utf-8')

    # Calculate the SHA-256 hash of the notebook name
    sha_hash = hashlib.sha256(notebook_name_bytes)

    # Get the hexadecimal representation of the hash
    encoded_name = sha_hash.hexdigest()
    return encoded_name

# # Example usage
# notebook_name = "my_notebook.ipynb"
# encoded_name = encode_notebook_name(notebook_name)
# print(encoded_name)

#### Get notebook data functions

In [None]:
def get_imports(sources, imports):
    # Loop on sources
    for source in sources.split("\n"):
        if "from" in source and "import" in source:
            lib = (
                source.replace("\n", "")
                .split("from")[-1]
                .split("import")[0]
                .strip()
            )
            module = (
                source.replace("\n", "")
                .split("import")[-1]
                .split(" as ")[0]
                .strip()
            )
            imports.append(f"{lib}.{module}")
        if "from" not in source and "import" in source:
            library = (
                source.replace("\n", "")
                .split("import")[-1]
                .split(" as ")[0]
                .strip()
            )
            imports.append(library)
    return imports

def get_notebook_info(notebook_path):
    # Init
    action = ""
    title = ""
    tags = ""
    author = ""
    author_url = ""
    description = ""
    plugin = False
    imports = []
    first_cell = []
    
    # Get notebook
    tool = notebook_path.split("/")[0]
    nb_init = load_notebook_node(notebook_path)
    nb = copy.deepcopy(nb_init)
    
    # Update encoded name
    encoded_name = encode_notebook_name(notebook_path)
    nb.metadata["naas"] = {}
    nb.metadata["naas"]["notebook_id"] = encoded_name
    nb.metadata["naas"]["notebook_path"] = notebook_path
    
    # Get cells
    cells = nb.cells

    # Get metadata store in fixed cells
    logo = cells[0].get("source")
    first_cell = cells[1].get("source")
    title = first_cell.replace("#", "").replace(tool, "").split("-", 1)[-1].split("\n")[0].strip()
    action = first_cell.split("<br><br>")[0].split("\n")[-1].strip()
    tags = cells[2].get("source").replace("**Tags:**", "").strip()
    tags = [f"#{tag.strip()}" for tag in tags.split("#") if tag != ""]
    author = cells[3].get("source").replace("**Author:**", "").strip()
    author_name = author.replace("[", "").replace("]", "").split("(")[0].strip()
    if "(" in author:
        author_url = author.split("(")[-1].replace(")", "")
    date_cell = cells[4].get("source")
    updated_at = date_cell.split("**Last update:**")[-1].split("(")[0].strip()
    created_at = date_cell.split("Created:")[-1].split(")")[0].strip()
    description = cells[5].get("source").replace("**Description:**", "").strip()

    # Get metadata store in variables cells
    for index, cell in enumerate(cells):
        cell_type = cell.get("cell_type")
        metadata = cell.get("metadata")
        sources = cell.get("source")
        outputs = cell.get("outputs")
        if cell_type == "code":
            imports = get_imports(sources, imports)
            # Remove metadata
            if metadata.get("execution"):
                del metadata['execution']
                nb.cells[index].metadata = metadata
            # Get plugin tags
            if "plugin" in metadata.get("tags"):
                plugin = True
                # Add #plugin into tags
                if "#plugin" not in tags:
                    nb.cells[2].source = "**Tags:** " + " ".join(tags) + " #plugin"
                    tags.append("#plugin")
            # Remove outputs except plugin
            if not "plugin" in metadata.get("tags") and outputs != []:
                nb.cells[index].outputs = []

    # Save notebook
    if nb != nb_init:
        write_ipynb(nb, notebook_path)
        print("✅ Notebook successfully updated.")
    return logo, title, action, tags, author_name, author_url, updated_at, created_at, description, imports, plugin, first_cell

# notebook_path = "OpenAI/OpenAI_Act_as_a_chef.ipynb"
# title, action, tags, author_name, author_url, updated_at, created_at, description, imports, plugin, first_cell = get_notebook_info(notebook_path)
# print("- Title:", title)
# print("- Action:", action)
# print("- Tags:", tags)
# print("- Author:", author_name)
# print("- Author URL:", author_url)
# print("- Last update:", updated_at)
# print("- Created at:", created_at)
# print("- Description:", description)
# print("- Imports:", imports)
# print("- Plugin:", plugin)
# print("- First cell:", first_cell)

#### Create title cell function

In [None]:
def create_title_cell(
    tool,
    title,
    open_in_lab,
    naas_lab_logo,
    open_in_chat,
    naas_chat_logo,
    template_request,
    bug_report,
    naas_lab_url,
    start_data_product,
):
    # Init
    title_cell_source = ""
    notebook_title = ''
    open_in_lab_url = ''
    open_in_chat_url = ''
    template_request_url = ''
    bug_report_url = ''
    generate_data_product_url = ''
    
    # Create notebook title
    notebook_title = f"# {tool} - {title}\n" # Jupyter Notebooks - Get libraries
    
    # Create logos
    if open_in_lab != "":
        open_in_lab_url = f"""<a href="{open_in_lab}" target="_parent"><img src="{naas_lab_logo}"/></a>"""
    if open_in_chat != "":
        open_in_chat_url = f"""<a href="{open_in_chat}" target="_parent"><img src="{naas_chat_logo}"/></a><br><br>"""
        
    # Hyperlinks
#     template_request_url = f"""<a href="{template_request}">Template request</a>"""
    
    title_url = (f"{tool}+-+{title}").replace(" ", "+")
    bug_report_url = f"""<a href="{bug_report}{title_url}:+Error+short+description">Bug report</a>"""

    feedbacks_url = f"""<a href="https://bit.ly/3JyWIk6">Give Feedback</a>"""
    
    start_data_product_url = f"{naas_lab_url}{start_data_product}"
    generate_data_product_url = f"""<a href="{start_data_product_url}" target="_parent">Generate Data Product</a>"""
    
    # Title cell source
    title_cell_source = f"""{notebook_title}{open_in_lab_url}{open_in_chat_url}<br><br>{feedbacks_url} | {bug_report_url}"""
    title_cell_source = f"""{notebook_title}{open_in_chat_url}{feedbacks_url} | {bug_report_url}"""
    
    # Display open button
#     markdown.display(title_cell_source)
    return title_cell_source

# plugin = False
# tool = "OpenAI"
# raw_url = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/OpenAI/OpenAI_Act_as_a_chef.ipynb"
# open_in_lab = f"{naas_lab_url}{raw_url}"
# open_in_chat = ""
# # Create Open in MyChatGPT URL
# if plugin:
#     open_in_chat = f"{naas_chat_url}{raw_url}"
# create_title_cell(
#     tool,
#     title,
#     open_in_lab,
#     naas_lab_logo,
#     open_in_chat,
#     naas_chat_logo,
#     template_request,
#     bug_report,
#     naas_lab_url,
#     start_data_product,
# )

#### Create logo

In [None]:
logo_path = os.path.join(".github", "assets", "logos")
os.makedirs(logo_path, exist_ok=True)
logos = sorted(os.listdir(logo_path))

character_path = os.path.join(".github", "assets", "characters")
os.makedirs(character_path, exist_ok=True)
characters = sorted(os.listdir(character_path))

active_branch = Repo('.').active_branch.name

def create_image_cell(image_name, logos, characters, active_branch):
    img_url = f"https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/{active_branch}/.github/assets/logos/Naas.png"
    img_md = f'<img width="8%" alt="Naas" src="{img_url}" style="border-radius: 15%">'
    for x in ['.png', ".jpg", ".jpeg"]:
        image = image_name + x
        image_quote = urllib.parse.quote(image)
        if image in logos:
            img_url = f"https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/{active_branch}/.github/assets/logos/{image_quote}"
            img_md = f'<img width="8%" alt="{image}" src="{img_url}" style="border-radius: 15%">'        
        if image in characters:
            img_url = f"https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/{active_branch}/.github/assets/characters/{image_quote}"
            img_md = f'<img width="8%" alt="{image}" src="{img_url}" style="border-radius: 15%">'     
    return img_md, img_url

# img_md, img_url = create_image_cell("Stephen Curry", logos, characters, active_branch)
# img_url

#### Update cell function

In [None]:
def update_cell(
    notebook_path,
    cell_num,
    source,
):
    # Get notebook
    nb = load_notebook_node(notebook_path)
    nb = copy.deepcopy(nb)
    
    # Update title
    nb.cells[cell_num].source = source
    
    # Set the good title format in the notebook
    write_ipynb(nb, notebook_path)

#### Loop on notebooks to apply functions

In [None]:
generated_list = ""
json_templates = []
folder = None

for row in df_notebooks.itertuples():
    notebook_path = row.notebook_path
    print(notebook_path)
    tool = row.tool
    raw_url = row.raw_url
    html_url = row.html_url
    open_in_lab = f"{naas_lab_url}{raw_url}"
    open_in_lab = ""
    open_in_chat = ""
    
    # Get data from notebook
    logo, title, action, tags, author_name, author_url, updated_at, created_at, description, imports, plugin, title_cell = get_notebook_info(notebook_path)
    
    # Create Open in MyChatGPT URL
    if plugin:
        open_in_chat = f"{naas_chat_url}{raw_url}"
        
    # Create OpenButton
    new_title_cell = create_title_cell(
        tool,
        title,
        open_in_lab,
        naas_lab_logo,
        open_in_chat,
        naas_chat_logo,
        template_request,
        bug_report,
        naas_lab_url,
        start_data_product,
    )
    
    # Create logo cell
    image_title = tool
    if '#avatar' in tags and "Chat with" in title:
        image_title = title.split("Chat with")[1].split("AI avatar")[0].strip()
    print(f"Image title: {image_title}")
    new_logo_cell, logo_url = create_image_cell(image_title, logos, characters, active_branch)

    # Update OpenButton in notebook
    if logo != new_logo_cell:
        print("⚠️ Logo cell to to be updated:", notebook_path)
        update_cell(
            notebook_path,
            0,
            new_logo_cell
        )
        print("✅ Logo cell successfully updated")
        
    # Update OpenButton in notebook
    if title_cell != new_title_cell:
        print("⚠️ Title cell to to be updated:", notebook_path)
        update_cell(
            notebook_path,
            1,
            new_title_cell
        )
        print("✅ Title cell successfully updated")
        
    # Update json
    ranking = 2
    if "#automation" in tags and tool in ["Notion", "LinkedIn", "Google Sheets", "HubSpot"]:
        ranking = 5
    elif tool in ["AI for Work"]:
        for x in ["Sales", "Marketing", "Social Media", "Brand", "Financial", "Legal"]:
            if x in title:
                ranking = 4
                break
    elif tool in ["Notion", "LinkedIn", "Google Sheets", "HubSpot", "Google Sheets", "AWS", "Google Docs", "Google Drive", "Google Analytics", "Gmail", "Bitly"]:
        ranking = 3
    elif tool in ["Naas", "Naas Chat Plugin", "Naas Dashboard"]:
        ranking = 2
    elif "#snippet" in tags:
        ranking = 1
    new_json = {
        'objectID': encode_notebook_name(notebook_path),
        'tool': tool,
        'notebook': title,
        'action': action,
        'tags': tags,
        'author': author_name,
        'author_url': author_url,
        "updated_at": updated_at,
        "created_at": created_at,
        'description':  description,
        "open_in_lab": open_in_lab,
        "open_in_chat": open_in_chat,
        "notebook_url": html_url,
        "imports": imports,
        "image_url": logo_url,
        "ranking": ranking
    }
    json_templates.append(new_json)
    
    # Create markdwon
    new_folder = row.tool
    if new_folder != folder:
        generated_list += f"\n## {new_folder}\n"
        folder = new_folder
    nb_redirect = f"* [{title}]({html_url})\n"
    generated_list += nb_redirect

## Output

### Generate README.md

In [None]:
# Open README template
template = open(readme_template).read()

# Replace var to get list of templates in markdown format
template = template.replace("[[DYNAMIC_LIST]]", generated_list)

# Save README
f  = open(readme, "w+")
f.write(template)
f.close()
print("✅ README updated")
# markdown.display(generated_list)

### Generate json for naas manager & naas search

In [None]:
with open(json_file, 'w') as f:
    json.dump(json_templates, f)
print("✅ JSON file updated", len(json_templates))
# pprint(json_templates[0])