<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Awesome Notebooks - Update
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/template.ipynb" target="_parent">
<img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/>
</a>

**Tags:** #jupyter #awesome-notebooks

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook update all notebooks in this repository based on specific rules.

## Input

### Import libraries

In [None]:
import glob
import naas
import requests
import pandas as pd
from pprint import pprint
import json
try:
    import openai
except:
    !pip install openai --user
    import openai
import uuid
import time
import subprocess
import re

### Setup Variables

In [None]:
# Inputs
repo_url = "https://github.com/jupyter-naas/awesome-notebooks"
GITHUB_TOKEN = naas.secret.get("GITHUB_TOKEN")
openai.api_key = naas.secret.get(name="OPENAI_API_KEY") or "ENTER_YOUR_OPENAI_API_KEY"

## Model

### Get current notebooks from master branch

In [None]:
def get_notebooks(commit_id):
    notebooks = []
    headers = {'Authorization': f'token {GITHUB_TOKEN}'}
    url = f"https://api.github.com/repos/jupyter-naas/awesome-notebooks/git/trees/{commit_id}?recursive=1"
    res = requests.get(url, headers=headers)
    res.raise_for_status()
    res_json = res.json()

    trees = res_json.get("tree")
    for file in trees:
        if ".github" not in file.get("path") and ".gitignore" not in file.get("path") and "/" in file.get("path"):
            if file.get("path").endswith(".ipynb"):
                temp = file.get("path").split("/")
                if temp == -1:
                    data = {
                        "ROOT": None,
                        "SUBDIR": file.get("path"),
                        "ID": file.get("sha"),
                        "URL": file.get("url"),
                    }
                    notebooks.append(data)
                else:
                    last_folder = ""
                    file_name = temp[-1]
                    temp.pop()
                    for folder in temp:
                        last_folder += "/" + folder
                    root = last_folder[1:]
                    data = {
                        "ROOT": root,
                        "SUBDIR": file_name,
                        "ID": file.get("sha"),
                        "URL": file.get("url"),
                    }
                    notebooks.append(data)
    df = pd.DataFrame(notebooks)
    df["CONTENT_URL"] = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/" + df["ROOT"] + "/" + df["SUBDIR"]
    df["GITHUB_URL"] = ("https://github.com/jupyter-naas/awesome-notebooks/blob/master/" + df["ROOT"] + "/" + df["SUBDIR"]).str.replace(" ", "%20")
    return df

df_notebooks = get_notebooks("master")
print("-> Awesome Notebooks fecthed:", len(df_notebooks))
df_notebooks.head(1)

### Get all notebooks in locals

In [None]:
notebooks = sorted(glob.glob('*/*.ipynb', recursive=True))
print("-> Local Notebooks fecthed:", len(notebooks))
notebooks[0]

### Create notebook description

In [None]:
def create_description(title):
    # Create completion
    response = openai.Completion.create(
      model="text-davinci-003",
      prompt=f'Can you create a description for this notebook in one sentence: "{title}"',
      temperature=0,
      max_tokens=2084,
    )

    # Extract the generated text
    text = response["choices"][0]["text"].strip()
    
    # Sleep
    time.sleep(3)
    return text

# title = "Excel - Apply Custom Styles"
# create_description(title)

### Update notebook

In [None]:
def update_notebook(notebook_path):
    # Init 
    new_cells = []
    
    
    # Open notebook
    with open(notebook_path) as f:
        nb = json.load(f)
        
    # Get all cellls
    cells = nb.get("cells")
    
    # Title
    title = cells[1]
    title_type = title.get("cell_type")
    title_source = title.get("source")[0].replace("#", "").strip()
    
    # Description
    desc = cells[4]
    desc_type = desc.get("cell_type")
    desc_source = desc.get("source")
    if isinstance(desc_source, list):
        desc_source = desc_source[0]
    
    # ## Input
    section_i = cells[5]
    section_i_type = section_i.get("cell_type")
    section_i_source = section_i.get("source")[0]
    
    # Check description
    create_desc = False
    rewrite_desc = False
    if desc_type == "markdown" and desc_source == '## Input':
        print(f"Creating description: {title_source}")
        create_desc = True
        description = create_description(title_source)
        new_description = f"**Description:** {description}"
        print(new_description)

    elif desc_type == "markdown" and not desc_source.startswith("**Description:**") and (section_i_source == '## Input' or section_i_source.startswith("<div class")):
        print(f"Updating description: {title_source}")
        rewrite_desc = True
        description = create_description(title_source)
        new_description = f"**Description:** {description}"
        print(new_description)
  
    # Apply change
    new_cells = []
    for index, cell in enumerate(cells):
        if index == 4 and create_desc:
            cell_desc = {
                'cell_type': 'markdown',
                'id': 'naas-description',
                'metadata': {'papermill': {}, 'tags': ["description"]},
                'source': [new_description]
            }
            new_cells.append(cell_desc)
        elif index == 4 and rewrite_desc:
            cell = {
                'cell_type': 'markdown',
                'id': 'naas-description',
                'metadata': {'papermill': {}, 'tags': ["description"]},
                'source': [new_description]
            }
        new_cells.append(cell)
                
    if create_desc or rewrite_desc:
        # Save notebook
        nb_new = nb.copy()
        nb_new["cells"] = new_cells
#         nb_new["metadata"]["naas"] = {"notebook_path": notebook_path.replace("./", ""),
#                                       "notebook_id": str(uuid.uuid4())}
        with open(notebook_path, 'w') as f:
            json.dump(nb_new, f, indent=1)
        print(f"✔️ {notebook_path} saved in Naas.")
    
# update_notebook(notebooks[0])

### Run black

In [None]:
def run_black(file):
    # Open file
    with open(file) as f:
        nb = json.load(f)
        
    # Get init cells
    cells_init = nb['cells']
    
    # Apply black on cells
    for cell in nb['cells']:
        if cell['cell_type'] == 'code':
            code = cell['source']
            code = "".join(code)
            result = subprocess.run(['black', '-', '--fast'], input=code.encode(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            if result.stdout:
                cells_change = True
                new_code = result.stdout.decode()
                if new_code.endswith("\n"):
                    new_code = new_code[:-1]
                cell['source'] = new_code
    
    # Save file if changes applied
    if cells_init != nb['cells']:       
        with open(file, 'w') as f:
            json.dump(nb, f, indent=1)
        print(f"✔️ Black apply to your notebook: {file}")

## Output

### Update all awesome notebooks

In [None]:
awesome_notebooks = df_notebooks["SUBDIR"].unique()
# Loop on awesome-notebook dir
for index, notebook in enumerate(notebooks):
    notebook_name = notebook.split("/")[-1]
    if notebook_name in awesome_notebooks:
        print(index, notebook_name)
#         update_notebook(notebook)
        run_black(notebook)