From cf7030831324dab0caf616cb48727c747354d314 Mon Sep 17 00:00:00 2001 From: Jiaxuan-Yang Date: Wed, 13 Jul 2022 09:27:21 -0400 Subject: [PATCH] Signed-off-by: Jiaxuan-Yang Address caveats katalog update script --- docs/import-assets.md | 2 +- docs/install-mlx-on-kind.md | 2 ++ .../python/regenerate_catalog_upload_json.py | 27 ++++++++++++++----- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/docs/import-assets.md b/docs/import-assets.md index 5138c244..f05a721e 100644 --- a/docs/import-assets.md +++ b/docs/import-assets.md @@ -10,7 +10,7 @@ Then expand the :gear: **Settings** at the bottom of the left-hand side navigati the **Bulk Imports** sections. Find and select the [JSON file](/bootstrapper/catalog_upload.json) that describes your catalog -and the import should begin. A progress bar will indicate the upload progress. +and the import should begin. A progress bar will indicate the upload progress. To regenerate the file from the YAML files that are found inside the `katalog` repository, use `python tools/python/regenerate_catalog_upload_json.py` command and check if changes are desired. If an asset from your Catalog did not get imported, make sure that all the metadata provided in the Catalog upload file is correct (API Key, Asset URL, etc.). If it is correct, ensure that the diff --git a/docs/install-mlx-on-kind.md b/docs/install-mlx-on-kind.md index cb8c8308..8c1cd143 100644 --- a/docs/install-mlx-on-kind.md +++ b/docs/install-mlx-on-kind.md @@ -99,6 +99,8 @@ curl -X POST \ -s "${UPLOAD_API}" | grep -iE "total_|error" ``` +To regenerate the catalog_upload.json file from the YAML files that are found inside the `katalog` repository, use `python tools/python/regenerate_catalog_upload_json.py` command and check if changes are desired. + Delete the `mlx` cluster when it is no longer needed: ```Bash diff --git a/tools/python/regenerate_catalog_upload_json.py b/tools/python/regenerate_catalog_upload_json.py index 66e60b2f..49f76c4d 100755 --- a/tools/python/regenerate_catalog_upload_json.py +++ b/tools/python/regenerate_catalog_upload_json.py @@ -6,8 +6,10 @@ from __future__ import print_function +import difflib import json import yaml +import os from glob import glob from os.path import abspath, dirname, relpath @@ -22,9 +24,9 @@ ] script_path = abspath(dirname(__file__)) -project_dir = dirname(script_path) +project_dir = dirname(dirname(script_path)) -katalog_dir = f"{project_dir}/../katalog" # TODO: don't assume user cloned katalog and mlx repos into same parent folder +katalog_dir = f"{project_dir}/../katalog" katalog_url = "https://raw.githubusercontent.com/machine-learning-exchange/katalog/main/" catalog_upload_json_files = [ @@ -46,6 +48,9 @@ def get_list_of_yaml_files_in_katalog(asset_type: str): def generate_katalog_dict() -> dict: katalog_dict = dict() + if not (os.path.isdir(katalog_dir)): + os.chdir(f"{project_dir}/..") + os.system("git clone https://github.com/machine-learning-exchange/katalog.git") for asset_type in asset_types: @@ -77,20 +82,30 @@ def rewrite_catalog_upload_json_files(katalog: dict): for file_path in catalog_upload_json_files: - with open(file_path, "w") as output_file: + print(" - " + relpath(file_path, project_dir)) + + with open(file_path, "r") as target_file: + + json_dict = json.load(target_file) + for element in json_dict: + if element[0: -1] not in asset_types: + katalog[element] = json_dict[element] - print(" - " + relpath(file_path, project_dir)) + with open(file_path, "w") as output_file: output_file.write(json.dumps(katalog, sort_keys=False, indent=2)) output_file.write("\n") + print('Please evaluate the changes:') + + for line in difflib.unified_diff( + json.dumps(json_dict, sort_keys=True, indent=2).split("\n"), json.dumps(katalog, sort_keys=True, indent=2).split("\n"), lineterm=''): + print(line) def main(): print("Regenerating catalog_upload.json files:") - # TODO: read current catalog_upload.json file(s) to capture non-katalog assets and restore later - katalog_dict = generate_katalog_dict() rewrite_catalog_upload_json_files(katalog_dict)