From e1a484ef2022add2fd645c842fca4bd496312822 Mon Sep 17 00:00:00 2001 From: karm-patel Date: Wed, 4 May 2022 01:03:26 +0530 Subject: [PATCH 1/2] Fix import error by adding module in requirement.txt --- requirements-extra.txt | 4 +--- requirements.txt | 4 ++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/requirements-extra.txt b/requirements-extra.txt index c66f845..047ba47 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -5,6 +5,4 @@ einops torchvision umap-learn umap -pytorch_lightning -TexSoup -firebase_admin \ No newline at end of file +pytorch_lightning \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 97d6b56..08263ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,7 @@ scikit-learn scipy graphviz distrax +pandas +TexSoup +firebase_admin +regex \ No newline at end of file From 234883ed223f2a191319ab6272b1df02abc775a1 Mon Sep 17 00:00:00 2001 From: karm-patel Date: Thu, 5 May 2022 11:11:00 +0530 Subject: [PATCH 2/2] resolve float `fig_no` bug --- probml_utils/url_utils.py | 42 +++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/probml_utils/url_utils.py b/probml_utils/url_utils.py index bc7a865..1337d2e 100644 --- a/probml_utils/url_utils.py +++ b/probml_utils/url_utils.py @@ -5,6 +5,7 @@ import regex as re import os import pandas as pd +import firebase_admin from firebase_admin import credentials, firestore, initialize_app def is_dead_url(link): @@ -68,6 +69,8 @@ def colab_url_to_github_url(url): def extract_scripts_name_from_caption(caption): """ extract foo.py from ...{https//:foo.py}{foo.py}... + Input: caption + Output: ['foo.py'] """ py_pattern = r"\{\S+?\.py\}" ipynb_pattern = r"\{\S+?\.ipynb\}" @@ -105,7 +108,7 @@ def make_url_from_chapter_no_and_script_name( (chapter_no = 3,script_name=iris_plot.ipynb) converted to https://github.com/probml/pyprobml/blob/master/notebooks/book1/01/iris_plot.ipynb """ base_url_ipynb = os.path.join(base_url, f"book{book_no}/{int(chapter_no):02d}") - if ".py" in script_name: + if script_name.strip().endswith(".py"): script_name = script_name[:-3] + ".ipynb" if convert_to_colab_url: return github_url_to_colab_url(os.path.join(base_url_ipynb, script_name)) @@ -119,6 +122,9 @@ def dict_to_csv(key_value_dict, csv_name): def figure_url_mapping_from_lof(lof_file_path, csv_name, convert_to_colab_url = True, base_url = "https://github.com/probml/pyprobml/blob/master/notebooks", book_no=1): + f''' + create mappng of fig_no to url by parsing lof_file and save mapping in {csv_name} + ''' with open(lof_file_path) as fp: LoF_File_Contents = fp.read() soup = TexSoup(LoF_File_Contents) @@ -140,40 +146,50 @@ def figure_url_mapping_from_lof(lof_file_path, csv_name, convert_to_colab_url = return url_mapping -def non_figure_notebook_url_mapping(notebooks_path, csv_name): +def non_figure_notebook_url_mapping(notebooks_path, csv_name, convert_to_colab_url = True, base_url = "https://github.com/probml/pyprobml/blob/master/notebooks", book_no=1): + f''' + create mapping of notebook_name to url using notebooks in given path - {notebook_path} and save mapping in {csv_name} + ''' url_mapping = {} for notebook_path in notebooks_path: parts = notebook_path.split("/") script_name = parts[-1] chapter_no = parts[-2] - book_no = parts[-3] - url = make_url_from_chapter_no_and_script_name(chapter_no,script_name) + url = make_url_from_chapter_no_and_script_name(chapter_no,script_name,convert_to_colab_url=convert_to_colab_url, base_url=base_url, book_no=book_no) key = script_name.split(".")[0] # remove extension url_mapping[key] = url if csv_name: dict_to_csv(url_mapping,csv_name) - print(f"Mapping of {len(url_mapping)} urls is saved in {csv_name}") + print(f"Mapping of {len(url_mapping)} urls is saved in {csv_name}") return url_mapping -def upload_urls_to_firestore(key_path,csv_path,level1_collection = "figures", - level2_document = None ,level3_collection = None): - - assert level2_document in ["book1", "book2"], "Incorrect level2_document value: possible values of level2_document should be ['book1', 'book2']" - +def create_firestore_db(key_path): cred = credentials.Certificate(key_path) try: default_app = initialize_app(cred) #this should called only once except ValueError: - pass + firebase_admin.delete_app(firebase_admin.get_app()) # delete current firebase app + default_app = initialize_app(cred) db = firestore.client() + return db + +def upload_urls_to_firestore(key_path,csv_path,level1_collection = "figures", + level2_document = None ,level3_collection = None): + + f''' + extract key-value pair from {csv_path} and upload in firestore database + ''' + assert level2_document in ["book1", "book2"], "Incorrect level2_document value: possible values of level2_document should be ['book1', 'book2']" + + db = create_firestore_db(key_path) collection = db.collection(level1_collection).document(level2_document).collection(level3_collection) - df = pd.read_csv(csv_path) + df = pd.read_csv(csv_path, dtype = str) # put dtype=str otherwise fig_no 3.30 will converted to 3.3 assert sorted(df.columns) == ["key","url"], f"columns of {csv_path} should be only 'key' and 'url'" print("Uploading...") for (key,url) in list(zip(df["key"], df["url"])): - collection.document(str(key)).set({"link": url}) + collection.document(key).set({"link": url}) print(f"{len(df)} urls uploaded!") \ No newline at end of file