<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Naas - Update dates on PR merged
<a href="https://app.naas.ai/user-redirect/naas/downloader?url=https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/template.ipynb" target="_parent">
<img src="https://naasai-public.s3.eu-west-3.amazonaws.com/open_in_naas.svg"/>
</a>

**Tags:** #naas #awesome-notebooks #github #pr #merge

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook updates dates (Last update and created) on files added or renamed on PR merged.

## Input

### Import libraries

In [None]:
import requests
import urllib.parse
import pandas as pd
from datetime import datetime
from papermill.iorw import (
    load_notebook_node,
    write_ipynb,
)
import copy
import json
import os

### Setup Variables

In [None]:
# Inputs
repo_path = "jupyter-naas/awesome-notebooks"
token = os.environ.get("GITHUB_TOKEN")
pull_number = 1984

## Model

### Get files changed on pull request

In [None]:
def get_files_changed_on_pull_request(
    token,
    repo_path,
    pull_number
):
    # Init
    files = {}
    url = f"https://api.github.com/repos/{repo_path}/pulls/{pull_number}/files"
    headers = {"Authorization": f"token {token}"}
    
    # Requests
    res = requests.get(url, headers=headers)
    if res.status_code == 200:
        for file in res.json():
            filename = file["filename"]
            status = file["status"]
            if status != "removed":
                files[filename] = status
    return files

files_changed = get_files_changed_on_pull_request(token, repo_path, pull_number)
print("✅ Notebooks changed fetched:", len(files_changed))
files_changed

### Get notebooks from master branch

In [None]:
def get_all_notebooks(branch):
    # Init
    html_url_base = "https://github.com/jupyter-naas/awesome-notebooks/blob/master"
    raw_url_base = "https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master"
    notebooks = []
    res_json = []
    
    # Get tree from json
    url = f"https://api.github.com/repos/jupyter-naas/awesome-notebooks/git/trees/{branch}?recursive=1"
    headers = {"Authorization": f"token {token}"}
    res = requests.get(url, headers=headers)
    if res.status_code == 200:
        res_json = res.json().get("tree")
    else:
        print(res.json())
    
    # Loop on files
    for r in res_json:
        root = None
        file_name = None
        file_path = r.get("path")
        notebook_path = urllib.parse.quote(file_path)
        if ".github" not in file_path and ".gitignore" not in file_path and "/" in file_path:
            if file_path.endswith(".ipynb"):
                data = {
                    "tool": file_path.split("/")[0],
                    "notebook_name": file_path.split("/")[1],
                    "notebook_path": file_path,
                    "html_url": f"{html_url_base}/{notebook_path}",
                    "raw_url": f"{raw_url_base}/{notebook_path}",
                }
                notebooks.append(data)
    return pd.DataFrame(notebooks)

df_master = get_all_notebooks("master")
print("✅ Notebooks fetched from master branch:", len(df_master))
df_master.head(1)

## Output

### Create or Update markdown

In [None]:
for file in files_changed:
    # Init
    notebook_path = file
    status = files_changed.get(file)
    updated_at = datetime.now().strftime("%Y-%m-%d")
    
    # Check if file changed in notebooks
    if notebook_path in df_master["notebook_path"].tolist():
        # Load notebook
        nb_init = load_notebook_node(notebook_path)
        nb = copy.deepcopy(nb_init)

        # Init
        new_cells = []
        new_source_dates = f'**Last update:** {updated_at} (Created: {updated_at})'

        # Loop on cells
        cells = nb.cells
        for i, cell in enumerate(cells):
            source = cell.source
            if i == 4 and not source.startswith("**Last update:**"):
                cell_date = {
                    'cell_type': 'markdown',
                    'id': str(uuid.uuid4()),
                    'metadata': {'papermill': {}, 'tags': []},
                    'source': new_source_dates
                }
                new_cells.append(cell_date)
            elif i == 4 and source.startswith("**Last update:**"):
                if status == "modified":
                    updated_at_old = source.split("**Last update:**")[-1].split("(")[0].strip()
                    cell["source"] = source.replace(updated_at_old, updated_at)
                elif status == "added":
                    cell["source"] = new_source_dates
            new_cells.append(cell)

        nb.cells = new_cells
        with open(notebook_path, "w") as f:
            json.dump(nb, f)
        print(f"✅ {notebook_path} saved in Naas.")
    else:
        print(f"File '{notebook_path}' is not an awesome-notebook.")