In [7]:
try:
    from firebase_admin import credentials, firestore, initialize_app
except:
    %pip install firebase_admin
    from firebase_admin import credentials, firestore, initialize_app
from IPython.display import clear_output
import json
import os
import pandas as pd
from probml_utils.url_utils import (
    check_dead_urls,
    is_dead_url,
    github_url_to_colab_url,
    make_url_from_chapter_no_and_script_name,
    dict_to_csv,
)
from glob import glob

%config Completer.use_jedi = False

In [2]:
key_path = "../../key_karm_gcp.json"
cred = credentials.Certificate(key_path)
default_app = initialize_app(cred)
db = firestore.client()
db

<google.cloud.firestore_v1.client.Client at 0x7f6289af3f40>

### Upload all notebooks url to database

In [8]:
notebooks_path = "../notebooks/book1/*/*.ipynb"
notebooks_1 = glob(notebooks_path)

In [10]:
def non_figure_notebook_url_mapping(notebooks_path, csv_name):
    url_mapping = {}
    for notebook_path in notebooks_path:
        parts = notebook_path.split("/")
        script_name = parts[-1]
        chapter_no = parts[-2]
        book_no = parts[-3]
        url = make_url_from_chapter_no_and_script_name(chapter_no, script_name)
        key = script_name.split(".")[0]  # remove extension
        url_mapping[key] = url
    dict_to_csv(url_mapping, csv_name)
    print(f"Mapping of {len(url_mapping)} urls is saved in {csv_name}")
    return url_mapping

In [11]:
mapping = non_figure_notebook_url_mapping(notebooks_1, "non_figures_url_mapping_book1_backward_compatibility.csv")
mapping

Mapping of 272 urls is saved in non_figures_url_mapping_book1_backward_compatibility.csv


{'bagging_trees': 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/bagging_trees.ipynb',
 'regtreeSurfaceDemo': 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/regtreeSurfaceDemo.ipynb',
 'spam_tree_ensemble_interpret': 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/spam_tree_ensemble_interpret.ipynb',
 'hinge_loss_plot': 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/04/hinge_loss_plot.ipynb',
 'boosted_regr_trees': 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/boosted_regr_trees.ipynb',
 'rf_demo_2d': 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/rf_demo_2d.ipynb',
 'dtree_sensitivity': 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/dtree_sensitivity.ipynb',
 'spam_tree_ensemble_compare': 'https://c

In [4]:
list(map(make_url_from_notebook_path, notebooks_1))

['https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/bagging_trees.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/regtreeSurfaceDemo.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/spam_tree_ensemble_interpret.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/hinge_loss_plot.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/boosted_regr_trees.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/rf_demo_2d.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/dtree_sensitivity.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob/master/notebooks/book1/18/spam_tree_ensemble_compare.ipynb',
 'https://colab.research.google.com/github/probml/pyprobml/blob

In [11]:
len(notebooks_1)

281

### Read & store as json

In [None]:
def save_urls_as_json(key_path, csv_path, level1_collection="figures", level2_document=None, level3_collection=None):

    assert level2_document in [
        "book1",
        "book2",
    ], "Incorrect level2_document value: possible values of level2_document should be ['book1', 'book2']"

    cred = credentials.Certificate(key_path)
    try:
        default_app = initialize_app(cred)  # this should called only once
    except ValueError:
        pass
    db = firestore.client()

    collection = db.collection(level1_collection).document(level2_document).collection(level3_collection)

    data_json = {level1_collection: {level2_document: {level3_collection: {}}}}

    for document in figure_collection.get():
        key = document.id
        value = document.to_dict()
        database_backup_json[level1_collection][level2_document][level3_collection][key] = value

In [4]:
level1_collection = "figures"
level2_document = "book1"
level3_collection = "figures"

database_backup_json = {level1_collection: {level2_document: {level3_collection: {}}}}

In [5]:
figure_collection = db.collection(level1_collection).document(level2_document).collection(level3_collection) # path to urls
for document in figure_collection.get():
    key = document.id
    value = document.to_dict()
    database_backup_json[level1_collection][level2_document][level3_collection][key] = value 
database_backup_json

#save as json
with open(j"database_backup_book1_old_urls.json", "w") as fp:
    fp.write(json.dumps(database_backup_json, indent=4))

{'figures': {'book1': {'figures': {'1.1': {'link': 'https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks//chapter1_introduction_figures.ipynb#1.1'},
    '1.10': {'link': 'https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks//chapter1_introduction_figures.ipynb#1.10'},
    '1.11': {'link': 'https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks//chapter1_introduction_figures.ipynb#1.11'},
    '1.12': {'link': 'https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks//chapter1_introduction_figures.ipynb#1.12'},
    '1.13': {'link': 'https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks//chapter1_introduction_figures.ipynb#1.13'},
    '1.14': {'link': 'https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks//chapter1_introduction_figures.ipynb#1.14'},
    '1.15': {'link': 'https://colab

In [25]:
# save as csv
df = pd.DataFrame(
    database_backup_json[level1_collection][level2_document][level3_collection].items(), columns=["key", "url"]
)
df["url"] = df["url"].apply(lambda x: x["link"])
df.set_index(keys="key", inplace=True, drop=True)
df

Unnamed: 0_level_0,url
key,Unnamed: 1_level_1
1.1,https://colab.research.google.com/github/probm...
1.10,https://colab.research.google.com/github/probm...
1.11,https://colab.research.google.com/github/probm...
1.12,https://colab.research.google.com/github/probm...
1.13,https://colab.research.google.com/github/probm...
...,...
tfidf_demo,https://github.com/probml/pyprobml/blob/master...
transformers_torch,https://github.com/probml/pyprobml/blob/master...
transposed_conv_torch,https://github.com/probml/pyprobml/blob/master...
word_analogies_torch,https://colab.research.google.com/github/probm...


In [26]:
df.to_csv("old_database_urls_mapping.csv")

## Upload as figure_old

In [39]:
df = pd.read_csv("old_database_urls_mapping.csv")
df

Unnamed: 0,key,url
0,1.1,https://colab.research.google.com/github/probm...
1,1.10,https://colab.research.google.com/github/probm...
2,1.11,https://colab.research.google.com/github/probm...
3,1.12,https://colab.research.google.com/github/probm...
4,1.13,https://colab.research.google.com/github/probm...
...,...,...
487,tfidf_demo,https://github.com/probml/pyprobml/blob/master...
488,transformers_torch,https://github.com/probml/pyprobml/blob/master...
489,transposed_conv_torch,https://github.com/probml/pyprobml/blob/master...
490,word_analogies_torch,https://colab.research.google.com/github/probm...


In [42]:
level1_collection = "figures"
level2_document = "book1"
level3_collection = "figures_old"

collection = db.collection(level1_collection).document(level2_document).collection(level3_collection)

In [43]:
for (key, url) in list(zip(df["key"], df["url"])):
    clear_output(wait=True)
    collection.document(key).set({"link": url})
    print(key)

x.y
