In [123]:
import os
import logging
from slack_sdk import WebClient
from datetime import timedelta, date, datetime
import json

DEFAULT_VAL_FREQ = 6
FILEPATH = "../"


In [124]:
def convert_to_date_and_delta(val_date, val_freq):
    "Converts validation date string to datetime and validation frequency string (months) to timedelta."
    try:
        val_date_conv = datetime.strptime(val_date.rstrip('\n'), '%Y-%m-%d').date()
        val_freq_conv = timedelta(days=int(val_freq) * 30.4)
        return val_date_conv, val_freq_conv
    except ValueError:
        # handles the case where validation format is incorrect
        return None, None


In [125]:
def get_prod_cat_ref():

    product_categories = {}

    # Load the menu file
    with open(FILEPATH + 'menu/navigation.json', 'r') as file:
        data = json.load(file)  # Parse the JSON content into a Python dictionary or list

        for grouping in data:
            for category in grouping["items"]:
                category_label = category["label"]
                for product in category["items"]:
                    product_label = product["slug"]
                    product_categories[product_label] = category_label

    return(product_categories)

In [126]:
def needs_review(val_date, val_freq):
    "Returns true if doc needs to be reviewed, based on val date and frequency"
    val_date_conv, val_freq_conv = convert_to_date_and_delta(val_date, val_freq)
    if val_date_conv is None or val_freq_conv is None:
        return False
    today = date.today()
    # calculate how long since doc was reviewed, in days
    delta = today - val_date_conv
    # return true or false depending on evaluation of data
    return delta >= val_freq_conv


In [127]:
def extract_metadata(filepath):
    "Extracts validation date and validation frequency from a document."
    with open(filepath) as doc:
        meta_limiters = 0
        has_val_date = False
        val_freq = DEFAULT_VAL_FREQ

        for line in doc:
            if "validation: " in line:
                val_date = line.split(": ", 1)[1].strip()
                has_val_date = True
            if "validation_frequency:" in line:
                val_freq = line.split(": ", 1)[1].strip()
            if "---" in line:
                meta_limiters += 1
            # once two --- strings are found, it is the end of the meta section, stop checking file
            if meta_limiters >= 2:
                break
                
    return has_val_date, val_date if has_val_date else None, val_freq


In [128]:
def process_files(directory):
    "Processes files in the content directory to check for those needing review."
    print("Processing files to check for those needing review")
    docs_to_review=[]
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            filepath = os.path.join(subdir, file)      
            if filepath.endswith(".mdx"):
                has_val_date, val_date, val_freq = extract_metadata(filepath)
                if has_val_date and needs_review(val_date, val_freq):
                    docs_to_review.append(filepath)
    return docs_to_review


In [148]:
def get_doc_cat_name(filepath, prod_cat_ref):
    "Returns a document-to-review's category and tidied-up filepath"
    trimmed_filepath = filepath[2:-4]
    filepath_list = trimmed_filepath.split("/")
    print("list", filepath_list)

    if filepath_list[1] == "tutorials":
        category = filepath_list[1]
    elif filepath_list[1] == "faq":
        category = filepath_list[2]
    else:
        print("CAT HERE", prod_cat_ref[filepath_list[2]])
        category = prod_cat_ref[filepath_list[2]]
    
    return category, trimmed_filepath


In [149]:
def organize_docs_by_category(docs_to_review):
    "Organizes docs to review by category into a dictionary."
    print("Organizing docs by category")
    dict_by_cat = {}
    # one shot: make a dict of all products and their categories, based on menu file
    prod_cat_ref = get_prod_cat_ref()
    print("PROD CAT REF", prod_cat_ref)
    for filepath in docs_to_review:
        category, trimmed_filepath = get_doc_cat_name(filepath, prod_cat_ref)

        if category not in dict_by_cat:
            dict_by_cat[category] = [trimmed_filepath]
        else:
            dict_by_cat[category].append(trimmed_filepath)
    
    # sort the dictionary alphabetically by category
    dict_by_cat_sorted = {key: value for key, value in sorted(dict_by_cat.items())}

    return dict_by_cat_sorted


In [150]:
def prep_message(docs_to_review_by_cat):
    "Prepares the message to sent to the Slack channel, containing the docs to review"
    print("Preparing message")
    message = ":wave: Hi doc team, here are some docs to review: \n \n"

    for key in docs_to_review_by_cat:
        message += "*" + key.title() + "*" + "\n"
        for doc in docs_to_review_by_cat[key]:
            message += doc + "\n"
        message += "\n"
    print(message)
    return(message)

In [151]:
def send_message(message):
    "Sends the message containing docs to review to the Slack channel"
    print("Sending message")
    client = WebClient(token=os.environ['SLACK_BOT_TOKEN'])
    client.chat_postMessage(
        channel = "#review-doc",
        text = message,
        username = "DocReviewBot"
    )

In [152]:
def main():
    docs_to_review = process_files(FILEPATH)
    docs_to_review_by_cat = organize_docs_by_category(docs_to_review)
    message = prep_message(docs_to_review_by_cat)
    print(message)
    # if os.environ.get("DRY_RUN") != "true":
        # send_message(message)

if __name__ == "__main__":
    main()

Processing files to check for those needing review
Organizing docs by category
PROD CAT REF {'account': 'Dedibox Console', 'billing': 'Account & Billing', 'organizations-and-projects': 'Security & Identity', 'iam': 'Security & Identity', 'audit-trail': 'Security & Identity', 'secret-manager': 'Security & Identity', 'managed-inference': 'AI & Data', 'generative-apis': 'AI & Data', 'apple-silicon': 'Bare Metal', 'elastic-metal': 'Bare Metal', 'dedibox': 'Bare Metal', 'instances': 'Compute', 'gpu': 'Compute', 'kubernetes': 'Containers', 'container-registry': 'Containers', 'scaleway-cli': 'Developer Tools', 'scaleway-sdk': 'Developer Tools', 'terraform': 'Developer Tools', 'postgresql-and-mysql': 'Managed Databases', 'redis': 'Managed Databases', 'mongodb': 'Managed Databases', 'transactional-email': 'Managed Services', 'iot-hub': 'Managed Services', 'webhosting': 'Managed Services', 'data-lab': 'Managed Services', 'vpc': 'Network', 'ipam': 'Network', 'public-gateways': 'Network', 'load-ba

KeyError: 'object'

In [153]:
# Next; deal with the products that aren't correctly renamed